The FreeRADIUS server $Id: 15bac2a4c627c01d1aa2047687b3418955ac7f00 $
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15 */
16
17/** Wrappers around various regular expression libraries
18 *
19 * @file src/lib/util/regex.c
20 *
21 * @copyright 2014 The FreeRADIUS server project
22 * @copyright 2014 Arran Cudbard-Bell (a.cudbardb@freeradius.org)
23 */
24RCSID("$Id: ecec9010400c9be79a070b6c6380a57e34b0e64e $")
25
26#ifdef HAVE_REGEX
27
28#include <freeradius-devel/util/regex.h>
29#include <freeradius-devel/util/atexit.h>
30
31#if defined(HAVE_REGEX_PCRE2) && defined(PCRE2_CONFIG_JIT)
32#ifndef FR_PCRE_JIT_STACK_MIN
33# define FR_PCRE_JIT_STACK_MIN (128 * 1024)
34#endif
35#ifndef FR_PCRE_JIT_STACK_MAX
36# define FR_PCRE_JIT_STACK_MAX (512 * 1024)
37#endif
38#endif
39
40const fr_sbuff_escape_rules_t regex_escape_rules = {
41 .name = "regex",
42 .chr = '\\',
43 .subs = {
44 ['$'] = '$',
45 ['('] = '(',
46 ['*'] = '*',
47 ['+'] = '+',
48 ['.'] = '.',
49 ['/'] = '/',
50 ['?'] = '?',
51 ['['] = '[',
52 ['\\'] = '\\',
53 ['^'] = '^',
54 ['`'] = '`',
55 ['|'] = '|',
56 ['\a'] = 'a',
57 ['\b'] = 'b',
58 ['\n'] = 'n',
59 ['\r'] = 'r',
60 ['\t'] = 't',
61 ['\v'] = 'v'
62 },
63 .esc = {
66 },
67 .do_utf8 = true,
68 .do_oct = true
69};
70
71
72/*
73 *######################################
74 *# FUNCTIONS FOR LIBPCRE2 #
75 *######################################
76 */
77#ifdef HAVE_REGEX_PCRE2
78/*
79 * Wrapper functions for libpcre2. Much more powerful, and guaranteed
80 * to be binary safe for both patterns and subjects but require
81 * libpcre2.
82 */
83
84/** Thread local storage for PCRE2
85 *
86 * Not all this storage is thread local, but it simplifies cleanup if
87 * we bind its lifetime to the thread, and lets us get away with not
88 * having specific init/free functions.
89 */
90typedef struct {
91 TALLOC_CTX *alloc_ctx; //!< Context used for any allocations.
92 pcre2_general_context *gcontext; //!< General context.
93 pcre2_compile_context *ccontext; //!< Compile context.
94 pcre2_match_context *mcontext; //!< Match context.
95#ifdef PCRE2_CONFIG_JIT
96 pcre2_jit_stack *jit_stack; //!< Jit stack for executing jit'd patterns.
97 bool do_jit; //!< Whether we have runtime JIT support.
98#endif
99} fr_pcre2_tls_t;
100
101/** Thread local storage for pcre2
102 *
103 */
104static _Thread_local fr_pcre2_tls_t *fr_pcre2_tls;
105
106/** Talloc wrapper for pcre2 memory allocation
107 *
108 * @param[in] to_alloc How many bytes to alloc.
109 * @param[in] uctx UNUSED.
110 */
111static void *_pcre2_talloc(PCRE2_SIZE to_alloc, UNUSED void *uctx)
112{
113 return talloc_array(fr_pcre2_tls->alloc_ctx, uint8_t, to_alloc);
114}
115
116/** Talloc wrapper for pcre2 memory freeing
117 *
118 * @param[in] to_free Memory to free.
119 * @param[in] uctx UNUSED.
120 */
121static void _pcre2_talloc_free(void *to_free, UNUSED void *uctx)
122{
123 talloc_free(to_free);
124}
125
126/** Free thread local data
127 *
128 * @param[in] tls Thread local data to free.
129 */
130static int _pcre2_tls_free(fr_pcre2_tls_t *tls)
131{
132 if (tls->gcontext) pcre2_general_context_free(tls->gcontext);
133 if (tls->ccontext) pcre2_compile_context_free(tls->ccontext);
134 if (tls->mcontext) pcre2_match_context_free(tls->mcontext);
135#ifdef PCRE2_CONFIG_JIT
136 if (tls->jit_stack) pcre2_jit_stack_free(tls->jit_stack);
137#endif
138
139 return 0;
140}
141
142static int _pcre2_tls_free_on_exit(void *arg)
143{
144 return talloc_free(arg);
145}
146
147/** Thread local init for pcre2
148 *
149 */
150static int fr_pcre2_tls_init(void)
151{
152 fr_pcre2_tls_t *tls;
153
154 if (unlikely(fr_pcre2_tls != NULL)) return 0;
155
156 fr_pcre2_tls = tls = talloc_zero(NULL, fr_pcre2_tls_t);
157 if (!tls) return -1;
158 talloc_set_destructor(tls, _pcre2_tls_free);
159
160 tls->gcontext = pcre2_general_context_create(_pcre2_talloc, _pcre2_talloc_free, NULL);
161 if (!tls->gcontext) {
162 fr_strerror_const("Failed allocating general context");
163 return -1;
164 }
165
166 tls->ccontext = pcre2_compile_context_create(tls->gcontext);
167 if (!tls->ccontext) {
168 fr_strerror_const("Failed allocating compile context");
169 error:
170 fr_pcre2_tls = NULL;
171 _pcre2_tls_free(tls);
172 return -1;
173 }
174
175 tls->mcontext = pcre2_match_context_create(tls->gcontext);
176 if (!tls->mcontext) {
177 fr_strerror_const("Failed allocating match context");
178 goto error;
179 }
180
181#ifdef PCRE2_CONFIG_JIT
182 pcre2_config(PCRE2_CONFIG_JIT, &tls->do_jit);
183 if (tls->do_jit) {
184 tls->jit_stack = pcre2_jit_stack_create(FR_PCRE_JIT_STACK_MIN, FR_PCRE_JIT_STACK_MAX, tls->gcontext);
185 if (!tls->jit_stack) {
186 fr_strerror_const("Failed allocating JIT stack");
187 goto error;
188 }
189 pcre2_jit_stack_assign(tls->mcontext, NULL, tls->jit_stack);
190 }
191#endif
192
193 /*
194 * Free on thread exit
195 */
196 fr_atexit_thread_local(fr_pcre2_tls, _pcre2_tls_free_on_exit, tls);
197 fr_pcre2_tls = tls; /* Assign to thread local storage */
198
199 return 0;
200}
201
202/** Free regex_t structure
203 *
204 * Calls libpcre specific free functions for the expression and study.
205 *
206 * @param preg to free.
207 */
208static int _regex_free(regex_t *preg)
209{
210 if (preg->compiled) pcre2_code_free(preg->compiled);
211
212 return 0;
213}
214
215/** Wrapper around pcre2_compile
216 *
217 * Allows the rest of the code to do compilations using one function signature.
218 *
219 * @note Compiled expression must be freed with talloc_free.
220 *
221 * @param[out] out Where to write out a pointer to the structure containing
222 * the compiled expression.
223 * @param[in] pattern to compile.
224 * @param[in] len of pattern.
225 * @param[in] flags controlling matching. May be NULL.
226 * @param[in] subcaptures Whether to compile the regular expression to store subcapture
227 * data.
228 * @param[in] runtime If false run the pattern through the PCRE JIT (if available)
229 * to convert it to machine code. This trades startup time (longer)
230 * for runtime performance (better).
231 * @return
232 * - >= 1 on success.
233 * - <= 0 on error. Negative value is offset of parse error.
234 */
235ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
236 fr_regex_flags_t const *flags, bool subcaptures, bool runtime)
237{
238 int ret;
239 PCRE2_SIZE offset;
240 uint32_t cflags = 0;
241 regex_t *preg;
242
243 /*
244 * Check inputs
245 */
246 *out = NULL;
247
248 /*
249 * Thread local initialisation
250 */
251 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
252
253 if (len == 0) {
254 fr_strerror_const("Empty expression");
255 return 0;
256 }
257
258 /*
259 * Options
260 */
261 if (flags) {
262 /* flags->global implemented by substitution function */
263 if (flags->ignore_case) cflags |= PCRE2_CASELESS;
264 if (flags->multiline) cflags |= PCRE2_MULTILINE;
265 if (flags->dot_all) cflags |= PCRE2_DOTALL;
266 if (flags->unicode) cflags |= PCRE2_UTF;
267 if (flags->extended) cflags |= PCRE2_EXTENDED;
268 }
269
270 if (!subcaptures) cflags |= PCRE2_NO_AUTO_CAPTURE;
271
272 preg = talloc_zero(ctx, regex_t);
273 talloc_set_destructor(preg, _regex_free);
274
275 preg->compiled = pcre2_compile((PCRE2_SPTR8)pattern, len,
276 cflags, &ret, &offset, fr_pcre2_tls->ccontext);
277 if (!preg->compiled) {
278 PCRE2_UCHAR errbuff[128];
279
280 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
281 fr_strerror_printf("%s", (char *)errbuff);
282 talloc_free(preg);
283
284 return -(ssize_t)offset;
285 }
286
287 if (!runtime) {
288 preg->precompiled = true;
289
290#ifdef PCRE2_CONFIG_JIT
291 /*
292 * This is expensive, so only do it for
293 * expressions that are going to be
294 * evaluated repeatedly.
295 */
296 if (fr_pcre2_tls->do_jit) {
297 ret = pcre2_jit_compile(preg->compiled, PCRE2_JIT_COMPLETE);
298 if (ret < 0) {
299 PCRE2_UCHAR errbuff[128];
300
301 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
302 fr_strerror_printf("Pattern JIT failed: %s", (char *)errbuff);
303 talloc_free(preg);
304
305 return 0;
306 }
307 preg->jitd = true;
308 }
309#endif
310 }
311
312 *out = preg;
313
314 return len;
315}
316
317/** Wrapper around pcre2_exec
318 *
319 * @param[in] preg The compiled expression.
320 * @param[in] subject to match.
321 * @param[in] len Length of subject.
322 * @param[in] regmatch Array of match pointers.
323 * @return
324 * - -1 on failure.
325 * - 0 on no match.
326 * - 1 on match.
327 */
328int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
329{
330 int ret;
331 uint32_t options = 0;
332
333 char *our_subject = NULL;
334 bool dup_subject = true;
335 pcre2_match_data *match_data;
336
337 /*
338 * Thread local initialisation
339 */
340 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
341
342 if (regmatch) {
343#ifdef PCRE2_COPY_MATCHED_SUBJECT
344 /*
345 * This is apparently only supported for pcre2_match
346 * NOT pcre2_jit_match.
347 */
348# ifdef PCRE2_CONFIG_JIT
349 if (!preg->jitd) {
350# endif
351 dup_subject = false;
352
353 /*
354 * If PCRE2_COPY_MATCHED_SUBJECT is available
355 * and set as an options flag, pcre2_match will
356 * strdup the subject string if pcre2_match is
357 * successful and store a pointer to it in the
358 * regmatch struct.
359 *
360 * The lifetime of the string memory will be
361 * bound to the regmatch struct. This is more
362 * efficient that doing it ourselves, as the
363 * strdup only occurs if the subject matches.
364 */
365 options |= PCRE2_COPY_MATCHED_SUBJECT;
366# ifdef PCRE2_CONFIG_JIT
367 }
368# endif
369#endif
370 if (dup_subject) {
371 /*
372 * We have to dup and operate on the duplicate
373 * of the subject, because pcre2_jit_match and
374 * pcre2_match store a pointer to the subject
375 * in the regmatch structure.
376 */
377 subject = our_subject = talloc_bstrndup(regmatch, subject, len);
378 if (!subject) {
379 fr_strerror_const("Out of memory");
380 return -1;
381 }
382#ifndef NDEBUG
383 regmatch->subject = subject; /* Stored only for tracking memory issues */
384#endif
385 }
386 }
387
388 /*
389 * If we weren't given match data we
390 * need to alloc it else pcre2_match
391 * fails when passed NULL match data.
392 */
393 if (!regmatch) {
394 match_data = pcre2_match_data_create_from_pattern(preg->compiled, fr_pcre2_tls->gcontext);
395 if (!match_data) {
396 fr_strerror_const("Failed allocating temporary match data");
397 return -1;
398 }
399 } else {
400 match_data = regmatch->match_data;
401 }
402
403#ifdef PCRE2_CONFIG_JIT
404 if (preg->jitd) {
405 ret = pcre2_jit_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
406 match_data, fr_pcre2_tls->mcontext);
407 } else
408#endif
409 {
410 ret = pcre2_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
411 match_data, fr_pcre2_tls->mcontext);
412 }
413 if (!regmatch) pcre2_match_data_free(match_data);
414 if (ret < 0) {
415 PCRE2_UCHAR errbuff[128];
416
417 if (dup_subject) talloc_free(our_subject);
418
419 if (ret == PCRE2_ERROR_NOMATCH) {
420 if (regmatch) regmatch->used = 0;
421 return 0;
422 }
423
424 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
425 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
426
427 return -1;
428 }
429
430 if (regmatch) regmatch->used = ret;
431
432 return 1;
433}
434
435/** Wrapper around pcre2_substitute
436 *
437 * @param[in] ctx to allocate output string in.
438 * @param[out] out Output string with replacements performed.
439 * @param[in] max_out Maximum length of output buffer. If this is 0 then
440 * the output length is unlimited.
441 * @param[in] preg The compiled expression.
442 * @param[in] flags that affect matching.
443 * @param[in] subject to perform replacements on.
444 * @param[in] subject_len the length of the subject.
445 * @param[in] replacement replacement string containing substitution
446 * markers.
447 * @param[in] replacement_len Length of the replacement string.
448 * @param[in] regmatch Array of match pointers.
449 * @return
450 * - >= 0 the length of the output string.
451 * - < 0 on error.
452 */
453int regex_substitute(TALLOC_CTX *ctx, char **out, size_t max_out, regex_t *preg, fr_regex_flags_t const *flags,
454 char const *subject, size_t subject_len,
455 char const *replacement, size_t replacement_len,
456 fr_regmatch_t *regmatch)
457{
458 int ret;
459 uint32_t options = 0;
460 size_t buff_len, actual_len;
461 char *buff;
462
463#ifndef PCRE2_COPY_MATCHED_SUBJECT
464 char *our_subject = NULL;
465#endif
466
467 /*
468 * Thread local initialisation
469 */
470 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
471
472 /*
473 * Internally pcre2_substitute just calls pcre2_match to
474 * generate the match data, so the same hack as the
475 * regex_exec function above is required.
476 */
477 if (regmatch) {
478#ifndef PCRE2_COPY_MATCHED_SUBJECT
479 /*
480 * We have to dup and operate on the duplicate
481 * of the subject, because pcre2_jit_match and
482 * pcre2_match store a pointer to the subject
483 * in the regmatch structure.
484 */
485 subject = our_subject = talloc_bstrndup(regmatch, subject, subject_len);
486 if (!subject) {
487 fr_strerror_const("Out of memory");
488 return -1;
489 }
490#else
491 /*
492 * If PCRE2_COPY_MATCHED_SUBJECT is available
493 * and set as an options flag, pcre2_match will
494 * strdup the subject string if pcre2_match is
495 * successful and store a pointer to it in the
496 * regmatch struct.
497 *
498 * The lifetime of the string memory will be
499 * bound to the regmatch struct. This is more
500 * efficient that doing it ourselves, as the
501 * strdup only occurs if the subject matches.
502 */
503 options |= PCRE2_COPY_MATCHED_SUBJECT;
504#endif
505 }
506
507 /*
508 * Guess (badly) what the length of the output buffer should be
509 */
510 actual_len = buff_len = subject_len + 1; /* +1 for the \0 */
511 buff = talloc_array(ctx, char, buff_len);
512 if (!buff) {
513#ifndef PCRE2_COPY_MATCHED_SUBJECT
514 talloc_free(our_subject);
515#endif
516 fr_strerror_const("Out of memory");
517 return -1;
518 }
519
520 options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
521 if (flags->global) options |= PCRE2_SUBSTITUTE_GLOBAL;
522
523again:
524 /*
525 * actual_len input value should be the size of the
526 * buffer including space for '\0'.
527 * If input buffer is too small, then actual_len will be set
528 * to the buffer space needed including space for '\0'.
529 * If input buffer is the correct size, then actual_len
530 * will be set to the size of the string written to buff
531 * without the terminating '\0'.
532 */
533 ret = pcre2_substitute(preg->compiled,
534 (PCRE2_SPTR8)subject, (PCRE2_SIZE)subject_len, 0,
535 options, NULL, fr_pcre2_tls->mcontext,
536 (PCRE2_UCHAR const *)replacement, replacement_len, (PCRE2_UCHAR *)buff, &actual_len);
537
538 if (ret < 0) {
539 PCRE2_UCHAR errbuff[128];
540
541#ifndef PCRE2_COPY_MATCHED_SUBJECT
542 talloc_free(our_subject);
543#endif
545
546 if (ret == PCRE2_ERROR_NOMEMORY) {
547 if ((max_out > 0) && (actual_len > max_out)) {
548 fr_strerror_printf("String length with substitutions (%zu) "
549 "exceeds max string length (%zu)", actual_len - 1, max_out - 1);
550 return -1;
551 }
552
553 /*
554 * Check that actual_len != buff_len as that'd be
555 * an actual error.
556 */
557 if (actual_len == buff_len) {
558 fr_strerror_const("libpcre2 out of memory");
559 return -1;
560 }
561 buff_len = actual_len; /* The length we get passed back includes the \0 */
562 buff = talloc_array(ctx, char, buff_len);
563 goto again;
564 }
565
566 if (ret == PCRE2_ERROR_NOMATCH) {
567 if (regmatch) regmatch->used = 0;
568 return 0;
569 }
570
571 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
572 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
573 return -1;
574 }
575
576 /*
577 * Trim the replacement buffer to the correct length
578 *
579 * buff_len includes \0.
580 * ...and as pcre2_substitute just succeeded actual_len does not include \0.
581 */
582 if (actual_len < (buff_len - 1)) {
583 buff = talloc_bstr_realloc(ctx, buff, actual_len);
584 if (!buff) {
585 fr_strerror_const("reallocing pcre2_substitute result buffer failed");
586 return -1;
587 }
588 }
589
590 if (regmatch) regmatch->used = ret;
591 *out = buff;
592
593 return 1;
594}
595
596
597/** Returns the number of subcapture groups
598 *
599 * @return
600 * - >0 The number of subcaptures contained within the pattern
601 * - 0 if the number of subcaptures can't be determined.
602 */
603uint32_t regex_subcapture_count(regex_t const *preg)
604{
606
607 if (pcre2_pattern_info(preg->compiled, PCRE2_INFO_CAPTURECOUNT, &count) != 0) {
608 fr_strerror_const("Error determining subcapture group count");
609 return 0;
610 }
611
612 return count + 1;
613}
614
615/** Free libpcre2's matchdata
616 *
617 * @note Don't call directly, will be called if talloc_free is called on a #regmatch_t.
618 */
619static int _pcre2_match_data_free(fr_regmatch_t *regmatch)
620{
621 pcre2_match_data_free(regmatch->match_data);
622 return 0;
623}
624
625/** Allocate vectors to fill with match data
626 *
627 * @param[in] ctx to allocate match vectors in.
628 * @param[in] count The number of vectors to allocate.
629 * @return
630 * - NULL on error.
631 * - Array of match vectors.
632 */
633fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
634{
635 fr_regmatch_t *regmatch;
636
637 /*
638 * Thread local initialisation
639 */
640 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return NULL;
641
642 regmatch = talloc(ctx, fr_regmatch_t);
643 if (!regmatch) {
644 oom:
645 fr_strerror_const("Out of memory");
646 return NULL;
647 }
648
649 regmatch->match_data = pcre2_match_data_create(count, fr_pcre2_tls->gcontext);
650 if (!regmatch->match_data) {
651 talloc_free(regmatch);
652 goto oom;
653 }
654 talloc_set_type(regmatch->match_data, pcre2_match_data);
655
656 talloc_set_destructor(regmatch, _pcre2_match_data_free);
657
658 return regmatch;
659}
660
661/*
662 *######################################
663 *# FUNCTIONS FOR POSIX-REGEX #
664 *######################################
665 */
666# else
667/*
668 * Wrapper functions for POSIX like, and extended regular
669 * expressions. These use the system regex library.
670 */
671
672/** Free heap allocated regex_t structure
673 *
674 * Heap allocation of regex_t is needed so regex_compile has the same signature with
675 * POSIX or libpcre.
676 *
677 * @param preg to free.
678 */
679static int _regex_free(regex_t *preg)
680{
681 regfree(preg);
682
683 return 0;
684}
685
686/** Binary safe wrapper around regcomp
687 *
688 * If we have the BSD extensions we don't need to do any special work
689 * if we don't have the BSD extensions we need to check to see if the
690 * regular expression contains any \0 bytes.
691 *
692 * If it does we fail and print the appropriate error message.
693 *
694 * @note Compiled expression must be freed with talloc_free.
695 *
696 * @param[in] ctx To allocate memory in.
697 * @param[out] out Where to write out a pointer
698 * to the structure containing the compiled expression.
699 * @param[in] pattern to compile.
700 * @param[in] len of pattern.
701 * @param[in] flags controlling matching. May be NULL.
702 * @param[in] subcaptures Whether to compile the regular expression
703 * to store subcapture data.
704 * @param[in] runtime Whether the compilation is being done at runtime.
705 * @return
706 * - >= 1 on success.
707 * - <= 0 on error. Negative value is offset of parse error.
708 * With POSIX regex we only give the correct offset for embedded \0 errors.
709 */
710ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
711 fr_regex_flags_t const *flags, bool subcaptures, UNUSED bool runtime)
712{
713 int ret;
714 int cflags = REG_EXTENDED;
715 regex_t *preg;
716
717 if (len == 0) {
718 fr_strerror_const("Empty expression");
719 return 0;
720 }
721
722 /*
723 * Options
724 */
725 if (flags) {
726 if (flags->global) {
727 fr_strerror_const("g - Global matching/substitution not supported with posix-regex");
728 return 0;
729 }
730 if (flags->dot_all) {
731 fr_strerror_const("s - Single line matching is not supported with posix-regex");
732 return 0;
733 }
734 if (flags->unicode) {
735 fr_strerror_const("u - Unicode matching not supported with posix-regex");
736 return 0;
737 }
738 if (flags->extended) {
739 fr_strerror_const("x - Whitespace and comments not supported with posix-regex");
740 return 0;
741 }
742
743 if (flags->ignore_case) cflags |= REG_ICASE;
744 if (flags->multiline) cflags |= REG_NEWLINE;
745 }
746
747
748 if (!subcaptures) cflags |= REG_NOSUB;
749
750#ifndef HAVE_REGNCOMP
751 {
752 char const *p;
753
754 p = pattern;
755 p += strlen(pattern);
756
757 if ((size_t)(p - pattern) != len) {
758 fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation",
759 (p - pattern));
760 return -(p - pattern);
761 }
762
763 preg = talloc_zero(ctx, regex_t);
764 if (!preg) return 0;
765
766 ret = regcomp(preg, pattern, cflags);
767 }
768#else
769 preg = talloc_zero(ctx, regex_t);
770 if (!preg) return 0;
771 ret = regncomp(preg, pattern, len, cflags);
772#endif
773 if (ret != 0) {
774 char errbuf[128];
775
776 regerror(ret, preg, errbuf, sizeof(errbuf));
777 fr_strerror_printf("%s", errbuf);
778
779 talloc_free(preg);
780
781 return 0; /* POSIX expressions don't give us the failure offset */
782 }
783
784 talloc_set_destructor(preg, _regex_free);
785 *out = preg;
786
787 return len;
788}
789
790/** Binary safe wrapper around regexec
791 *
792 * If we have the BSD extensions we don't need to do any special work
793 * If we don't have the BSD extensions we need to check to see if the
794 * value to be compared contains any \0 bytes.
795 *
796 * If it does, we fail and print the appropriate error message.
797 *
798 * @param[in] preg The compiled expression.
799 * @param[in] subject to match.
800 * @param[in] regmatch Match result structure.
801 * @return
802 * - -1 on failure.
803 * - 0 on no match.
804 * - 1 on match.
805 */
806int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
807{
808 int ret;
809 size_t matches;
810
811 /*
812 * Disable capturing
813 */
814 if (!regmatch) {
815 matches = 0;
816 } else {
817 matches = regmatch->allocd;
818
819 /*
820 * Reset the match result structure
821 */
822 memset(regmatch->match_data, 0, sizeof(regmatch->match_data[0]) * matches);
823 regmatch->used = 0;
824 }
825
826#ifndef HAVE_REGNEXEC
827 {
828 char const *p;
829
830 p = subject;
831 p += strlen(subject);
832
833 if ((size_t)(p - subject) != len) {
834 fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation",
835 (p - subject));
836 if (regmatch) regmatch->used = 0;
837 return -1;
838 }
839 ret = regexec(preg, subject, matches, regmatch ? regmatch->match_data : NULL, 0);
840 }
841#else
842 ret = regnexec(preg, subject, len, matches, regmatch ? regmatch->match_data : NULL, 0);
843#endif
844 if (ret != 0) {
845 if (ret != REG_NOMATCH) {
846 char errbuf[128];
847
848 regerror(ret, preg, errbuf, sizeof(errbuf));
849
850 fr_strerror_printf("regex evaluation failed: %s", errbuf);
851 return -1;
852 }
853 return 0;
854 }
855
856 /*
857 * Update regmatch->count to be the maximum number of
858 * groups that *could* have been populated as we don't
859 * have the number of matches.
860 */
861 if (regmatch) {
862 regmatch->used = preg->re_nsub + 1;
863
864 if (regmatch->subject) talloc_const_free(regmatch->subject);
865 regmatch->subject = talloc_bstrndup(regmatch, subject, len);
866 if (!regmatch->subject) {
867 fr_strerror_const("Out of memory");
868 return -1;
869 }
870 }
871 return 1;
872}
873
874/** Returns the number of subcapture groups
875 *
876 * @return
877 * - 0 we can't determine this for POSIX regular expressions.
878 */
879uint32_t regex_subcapture_count(UNUSED regex_t const *preg)
880{
881 return 0;
882}
883# endif
884
885# if defined(HAVE_REGEX_POSIX)
886/** Allocate vectors to fill with match data
887 *
888 * @param[in] ctx to allocate match vectors in.
889 * @param[in] count The number of vectors to allocate.
890 * @return
891 * - NULL on error.
892 * - Array of match vectors.
893 */
894fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
895{
896 fr_regmatch_t *regmatch;
897
898 /*
899 * Pre-allocate space for the match structure
900 * and for a 128b subject string.
901 */
902 regmatch = talloc_zero_pooled_object(ctx, fr_regmatch_t, 2, (sizeof(regmatch_t) * count) + 128);
903 if (unlikely(!regmatch)) {
904 error:
905 fr_strerror_const("Out of memory");
906 talloc_free(regmatch);
907 return NULL;
908 }
909 regmatch->match_data = talloc_array(regmatch, regmatch_t, count);
910 if (unlikely(!regmatch->match_data)) goto error;
911
912 regmatch->allocd = count;
913 regmatch->used = 0;
914 regmatch->subject = NULL;
915
916 return regmatch;
917}
918# endif
919
920/*
921 *########################################
922 *# UNIVERSAL FUNCTIONS #
923 *########################################
924 */
925
926/** Parse a string containing one or more regex flags
927 *
928 * @param[out] err May be NULL. If not NULL will be set to:
929 * - 0 on success.
930 * - -1 on unknown flag.
931 * - -2 on duplicate.
932 * @param[out] out Flag structure to populate. Must be initialised to zero
933 * if this is the first call to regex_flags_parse.
934 * @param[in] in Flag string to parse.
935 * @param[in] terminals Terminal characters. If parsing ends before the buffer
936 * is exhausted, and is pointing to one of these chars
937 * it's not considered an error.
938 * @param[in] err_on_dup Error if the flag is already set.
939 * @return
940 * - > 0 on success. The number of flag bytes parsed.
941 * - <= 0 on failure. Negative offset of first unrecognised flag.
942 */
943fr_slen_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in,
944 fr_sbuff_term_t const *terminals, bool err_on_dup)
945{
946 fr_sbuff_t our_in = FR_SBUFF(in);
947
948 if (err) *err = 0;
949
950 while (fr_sbuff_extend(&our_in)) {
951 switch (*our_in.p) {
952#define DO_REGEX_FLAG(_f, _c) \
953 case _c: \
954 if (err_on_dup && out->_f) { \
955 fr_strerror_printf("Duplicate regex flag '%c'", *our_in.p); \
956 if (err) *err = -2; \
957 FR_SBUFF_ERROR_RETURN(&our_in); \
958 } \
959 out->_f = 1; \
960 break
961
962 DO_REGEX_FLAG(global, 'g');
963 DO_REGEX_FLAG(ignore_case, 'i');
964 DO_REGEX_FLAG(multiline, 'm');
965 DO_REGEX_FLAG(dot_all, 's');
966 DO_REGEX_FLAG(unicode, 'u');
967 DO_REGEX_FLAG(extended, 'x');
968#undef DO_REGEX_FLAG
969
970 default:
971 if (fr_sbuff_is_terminal(&our_in, terminals)) FR_SBUFF_SET_RETURN(in, &our_in);
972
973 fr_strerror_printf("Unsupported regex flag '%c'", *our_in.p);
974 if (err) *err = -1;
975 FR_SBUFF_ERROR_RETURN(&our_in);
976 }
977 fr_sbuff_advance(&our_in, 1);
978 }
979 FR_SBUFF_SET_RETURN(in, &our_in);
980}
981
982/** Print the flags
983 *
984 * @param[out] sbuff where to write flags.
985 * @param[in] flags to print.
986 * @return
987 * - The number of bytes written to the out buffer.
988 * - A number >= outlen if truncation has occurred.
989 */
990ssize_t regex_flags_print(fr_sbuff_t *sbuff, fr_regex_flags_t const *flags)
991{
992 fr_sbuff_t our_sbuff = FR_SBUFF(sbuff);
993
994#define DO_REGEX_FLAG(_f, _c) \
995 if (flags->_f) FR_SBUFF_IN_CHAR_RETURN(&our_sbuff, _c)
996
997 DO_REGEX_FLAG(global, 'g');
998 DO_REGEX_FLAG(ignore_case, 'i');
999 DO_REGEX_FLAG(multiline, 'm');
1000 DO_REGEX_FLAG(dot_all, 's');
1001 DO_REGEX_FLAG(unicode, 'u');
1002 DO_REGEX_FLAG(extended, 'x');
1003#undef DO_REGEX_FLAG
1004
1005 FR_SBUFF_SET_RETURN(sbuff, &our_sbuff);
1006}
1007#endif
1008
1009/** Compare two boxes using an operator
1010 *
1011 * @todo - allow /foo/i on the RHS
1012 *
1013 * However, this involves allocating intermediate sbuffs for the
1014 * unescaped RHS, and all kinds of extra work. It's not overly hard,
1015 * but it's something we wish to avoid for now.
1016 *
1017 * @param[in] op to use in comparison. MUST be T_OP_REG_EQ or T_OP_REG_NE
1018 * @param[in] a Value to compare, MUST be FR_TYPE_STRING
1019 * @param[in] b uncompiled regex as FR_TYPE_STRING
1020 * @return
1021 * - 1 if true
1022 * - 0 if false
1023 * - -1 on failure.
1024 */
1026{
1027 int rcode;
1028 TALLOC_CTX *ctx = NULL;
1029 size_t lhs_len;
1030 char const *lhs;
1031 regex_t *regex = NULL;
1032
1033 if (!((op == T_OP_REG_EQ) || (op == T_OP_REG_NE))) {
1034 fr_strerror_const("Invalid operator for regex comparison");
1035 return -1;
1036 }
1037
1038 if (b->type != FR_TYPE_STRING) {
1039 fr_strerror_const("RHS must be regular expression");
1040 return -1;
1041 }
1042
1043 ctx = talloc_init_const("regex_cmp_op");
1044 if (!ctx) return -1;
1045
1046 if ((a->type != FR_TYPE_STRING) && (a->type != FR_TYPE_OCTETS)) {
1047 fr_slen_t slen;
1048 char *p;
1049
1050 slen = fr_value_box_aprint(ctx, &p, a, NULL); /* no escaping */
1051 if (slen < 0) return slen;
1052
1053 lhs = p;
1054 lhs_len = slen;
1055
1056 } else {
1057 lhs = a->vb_strvalue;
1058 lhs_len = a->vb_length;
1059 }
1060
1061 if (regex_compile(ctx, &regex, b->vb_strvalue, b->vb_length, NULL, false, true) < 0) {
1062 talloc_free(ctx);
1063 return -1;
1064 }
1065
1066#ifdef STATIC_ANALYZER
1067 if (!regex) {
1068 talloc_free(ctx);
1069 return -1;
1070 }
1071#endif
1072
1073 rcode = regex_exec(regex, lhs, lhs_len, NULL);
1074 talloc_free(ctx);
1075 if (rcode < 0) return rcode;
1076
1077 /*
1078 * Invert the sense of the rcode for !~
1079 */
1080 if (op == T_OP_REG_NE) rcode = (rcode == 0);
1081
1082 return rcode;
1083}
#define fr_atexit_thread_local(_name, _free, _uctx)
Definition atexit.h:221
#define RCSID(id)
Definition build.h:485
#define unlikely(_x)
Definition build.h:383
#define UNUSED
Definition build.h:317
static fr_slen_t err
Definition dict.h:841
static fr_slen_t in
Definition dict.h:841
talloc_free(reap)
@ FR_TYPE_STRING
String of printable characters.
@ FR_TYPE_OCTETS
Raw octets.
unsigned int uint32_t
long int ssize_t
unsigned char uint8_t
ssize_t fr_slen_t
bool fr_sbuff_is_terminal(fr_sbuff_t *in, fr_sbuff_term_t const *tt)
Efficient terminal string search.
Definition sbuff.c:2185
#define fr_sbuff_extend(_sbuff_or_marker)
#define FR_SBUFF_ERROR_RETURN(_sbuff_or_marker)
#define FR_SBUFF_SET_RETURN(_dst, _src)
#define SBUFF_CHAR_UNPRINTABLES_EXTENDED
#define FR_SBUFF(_sbuff_or_marker)
#define fr_sbuff_advance(_sbuff_or_marker, _len)
#define SBUFF_CHAR_UNPRINTABLES_LOW
Set of terminal elements.
static char buff[sizeof("18446744073709551615")+3]
Definition size_tests.c:41
return count
Definition module.c:155
char * talloc_bstr_realloc(TALLOC_CTX *ctx, char *in, size_t inlen)
Trim a bstr (char) buffer.
Definition talloc.c:650
char * talloc_bstrndup(TALLOC_CTX *ctx, char const *in, size_t inlen)
Binary safe strndup function.
Definition talloc.c:586
#define talloc_zero_pooled_object(_ctx, _type, _num_subobjects, _total_subobjects_size)
Definition talloc.h:177
static int talloc_const_free(void const *ptr)
Free const'd memory.
Definition talloc.h:229
static TALLOC_CTX * talloc_init_const(char const *name)
Allocate a top level chunk with a constant name.
Definition talloc.h:112
enum fr_token fr_token_t
@ T_OP_REG_EQ
Definition token.h:102
@ T_OP_REG_NE
Definition token.h:103
int fr_regex_cmp_op(fr_token_t op, fr_value_box_t const *a, fr_value_box_t const *b)
Compare two boxes using an operator.
Definition regex.c:1025
#define fr_strerror_printf(_fmt,...)
Log to thread local error buffer.
Definition strerror.h:64
#define fr_strerror_const(_msg)
Definition strerror.h:223
static fr_slen_t fr_value_box_aprint(TALLOC_CTX *ctx, char **out, fr_value_box_t const *data, fr_sbuff_escape_rules_t const *e_rules) 1(fr_value_box_print
static size_t char ** out
Definition value.h:1023