The FreeRADIUS server $Id: 15bac2a4c627c01d1aa2047687b3418955ac7f00 $
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15 */
16
17/** Wrappers around various regular expression libraries
18 *
19 * @file src/lib/util/regex.c
20 *
21 * @copyright 2014 The FreeRADIUS server project
22 * @copyright 2014 Arran Cudbard-Bell (a.cudbardb@freeradius.org)
23 */
24RCSID("$Id: 2fb107af701824686441dbb2adf022ff719cad02 $")
25
26#ifdef HAVE_REGEX
27
28#include <freeradius-devel/util/regex.h>
29
30#if defined(HAVE_REGEX_PCRE2) && defined(PCRE2_CONFIG_JIT)
31#ifndef FR_PCRE_JIT_STACK_MIN
32# define FR_PCRE_JIT_STACK_MIN (128 * 1024)
33#endif
34#ifndef FR_PCRE_JIT_STACK_MAX
35# define FR_PCRE_JIT_STACK_MAX (512 * 1024)
36#endif
37#endif
38
39const fr_sbuff_escape_rules_t regex_escape_rules = {
40 .name = "regex",
41 .chr = '\\',
42 .subs = {
43 ['$'] = '$',
44 ['('] = '(',
45 ['*'] = '*',
46 ['+'] = '+',
47 ['.'] = '.',
48 ['/'] = '/',
49 ['?'] = '?',
50 ['['] = '[',
51 ['\\'] = '\\',
52 ['^'] = '^',
53 ['`'] = '`',
54 ['|'] = '|',
55 ['\a'] = 'a',
56 ['\b'] = 'b',
57 ['\n'] = 'n',
58 ['\r'] = 'r',
59 ['\t'] = 't',
60 ['\v'] = 'v'
61 },
62 .esc = {
65 },
66 .do_utf8 = true,
67 .do_oct = true
68};
69
70
71/*
72 *######################################
73 *# FUNCTIONS FOR LIBPCRE2 #
74 *######################################
75 */
76#ifdef HAVE_REGEX_PCRE2
77/*
78 * Wrapper functions for libpcre2. Much more powerful, and guaranteed
79 * to be binary safe for both patterns and subjects but require
80 * libpcre2.
81 */
82
83/** Thread local storage for PCRE2
84 *
85 * Not all this storage is thread local, but it simplifies cleanup if
86 * we bind its lifetime to the thread, and lets us get away with not
87 * having specific init/free functions.
88 */
89typedef struct {
90 TALLOC_CTX *alloc_ctx; //!< Context used for any allocations.
91 pcre2_general_context *gcontext; //!< General context.
92 pcre2_compile_context *ccontext; //!< Compile context.
93 pcre2_match_context *mcontext; //!< Match context.
94#ifdef PCRE2_CONFIG_JIT
95 pcre2_jit_stack *jit_stack; //!< Jit stack for executing jit'd patterns.
96 bool do_jit; //!< Whether we have runtime JIT support.
97#endif
98} fr_pcre2_tls_t;
99
100/** Thread local storage for pcre2
101 *
102 */
103static _Thread_local fr_pcre2_tls_t *fr_pcre2_tls;
104
105/** Talloc wrapper for pcre2 memory allocation
106 *
107 * @param[in] to_alloc How many bytes to alloc.
108 * @param[in] uctx UNUSED.
109 */
110static void *_pcre2_talloc(PCRE2_SIZE to_alloc, UNUSED void *uctx)
111{
112 return talloc_array(fr_pcre2_tls->alloc_ctx, uint8_t, to_alloc);
113}
114
115/** Talloc wrapper for pcre2 memory freeing
116 *
117 * @param[in] to_free Memory to free.
118 * @param[in] uctx UNUSED.
119 */
120static void _pcre2_talloc_free(void *to_free, UNUSED void *uctx)
121{
122 talloc_free(to_free);
123}
124
125/** Free thread local data
126 *
127 * @param[in] tls Thread local data to free.
128 */
129static int _pcre2_tls_free(fr_pcre2_tls_t *tls)
130{
131 if (tls->gcontext) pcre2_general_context_free(tls->gcontext);
132 if (tls->ccontext) pcre2_compile_context_free(tls->ccontext);
133 if (tls->mcontext) pcre2_match_context_free(tls->mcontext);
134#ifdef PCRE2_CONFIG_JIT
135 if (tls->jit_stack) pcre2_jit_stack_free(tls->jit_stack);
136#endif
137
138 return 0;
139}
140
141static int _pcre2_tls_free_on_exit(void *arg)
142{
143 return talloc_free(arg);
144}
145
146/** Thread local init for pcre2
147 *
148 */
149static int fr_pcre2_tls_init(void)
150{
151 fr_pcre2_tls_t *tls;
152
153 if (unlikely(fr_pcre2_tls != NULL)) return 0;
154
155 fr_pcre2_tls = tls = talloc_zero(NULL, fr_pcre2_tls_t);
156 if (!tls) return -1;
157 talloc_set_destructor(tls, _pcre2_tls_free);
158
159 tls->gcontext = pcre2_general_context_create(_pcre2_talloc, _pcre2_talloc_free, NULL);
160 if (!tls->gcontext) {
161 fr_strerror_const("Failed allocating general context");
162 return -1;
163 }
164
165 tls->ccontext = pcre2_compile_context_create(tls->gcontext);
166 if (!tls->ccontext) {
167 fr_strerror_const("Failed allocating compile context");
168 error:
169 fr_pcre2_tls = NULL;
170 _pcre2_tls_free(tls);
171 return -1;
172 }
173
174 tls->mcontext = pcre2_match_context_create(tls->gcontext);
175 if (!tls->mcontext) {
176 fr_strerror_const("Failed allocating match context");
177 goto error;
178 }
179
180#ifdef PCRE2_CONFIG_JIT
181 pcre2_config(PCRE2_CONFIG_JIT, &tls->do_jit);
182 if (tls->do_jit) {
183 tls->jit_stack = pcre2_jit_stack_create(FR_PCRE_JIT_STACK_MIN, FR_PCRE_JIT_STACK_MAX, tls->gcontext);
184 if (!tls->jit_stack) {
185 fr_strerror_const("Failed allocating JIT stack");
186 goto error;
187 }
188 pcre2_jit_stack_assign(tls->mcontext, NULL, tls->jit_stack);
189 }
190#endif
191
192 /*
193 * Free on thread exit
194 */
195 fr_atexit_thread_local(fr_pcre2_tls, _pcre2_tls_free_on_exit, tls);
196 fr_pcre2_tls = tls; /* Assign to thread local storage */
197
198 return 0;
199}
200
201/** Free regex_t structure
202 *
203 * Calls libpcre specific free functions for the expression and study.
204 *
205 * @param preg to free.
206 */
207static int _regex_free(regex_t *preg)
208{
209 if (preg->compiled) pcre2_code_free(preg->compiled);
210
211 return 0;
212}
213
214/** Wrapper around pcre2_compile
215 *
216 * Allows the rest of the code to do compilations using one function signature.
217 *
218 * @note Compiled expression must be freed with talloc_free.
219 *
220 * @param[out] out Where to write out a pointer to the structure containing
221 * the compiled expression.
222 * @param[in] pattern to compile.
223 * @param[in] len of pattern.
224 * @param[in] flags controlling matching. May be NULL.
225 * @param[in] subcaptures Whether to compile the regular expression to store subcapture
226 * data.
227 * @param[in] runtime If false run the pattern through the PCRE JIT (if available)
228 * to convert it to machine code. This trades startup time (longer)
229 * for runtime performance (better).
230 * @return
231 * - >= 1 on success.
232 * - <= 0 on error. Negative value is offset of parse error.
233 */
234ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
235 fr_regex_flags_t const *flags, bool subcaptures, bool runtime)
236{
237 int ret;
238 PCRE2_SIZE offset;
239 uint32_t cflags = 0;
240 regex_t *preg;
241
242 /*
243 * Check inputs
244 */
245 *out = NULL;
246
247 /*
248 * Thread local initialisation
249 */
250 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
251
252 if (len == 0) {
253 fr_strerror_const("Empty expression");
254 return 0;
255 }
256
257 /*
258 * Options
259 */
260 if (flags) {
261 /* flags->global implemented by substitution function */
262 if (flags->ignore_case) cflags |= PCRE2_CASELESS;
263 if (flags->multiline) cflags |= PCRE2_MULTILINE;
264 if (flags->dot_all) cflags |= PCRE2_DOTALL;
265 if (flags->unicode) cflags |= PCRE2_UTF;
266 if (flags->extended) cflags |= PCRE2_EXTENDED;
267 }
268
269 if (!subcaptures) cflags |= PCRE2_NO_AUTO_CAPTURE;
270
271 preg = talloc_zero(ctx, regex_t);
272 talloc_set_destructor(preg, _regex_free);
273
274 preg->compiled = pcre2_compile((PCRE2_SPTR8)pattern, len,
275 cflags, &ret, &offset, fr_pcre2_tls->ccontext);
276 if (!preg->compiled) {
277 PCRE2_UCHAR errbuff[128];
278
279 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
280 fr_strerror_printf("%s", (char *)errbuff);
281 talloc_free(preg);
282
283 return -(ssize_t)offset;
284 }
285
286 if (!runtime) {
287 preg->precompiled = true;
288
289#ifdef PCRE2_CONFIG_JIT
290 /*
291 * This is expensive, so only do it for
292 * expressions that are going to be
293 * evaluated repeatedly.
294 */
295 if (fr_pcre2_tls->do_jit) {
296 ret = pcre2_jit_compile(preg->compiled, PCRE2_JIT_COMPLETE);
297 if (ret < 0) {
298 PCRE2_UCHAR errbuff[128];
299
300 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
301 fr_strerror_printf("Pattern JIT failed: %s", (char *)errbuff);
302 talloc_free(preg);
303
304 return 0;
305 }
306 preg->jitd = true;
307 }
308#endif
309 }
310
311 *out = preg;
312
313 return len;
314}
315
316/** Wrapper around pcre2_exec
317 *
318 * @param[in] preg The compiled expression.
319 * @param[in] subject to match.
320 * @param[in] len Length of subject.
321 * @param[in] regmatch Array of match pointers.
322 * @return
323 * - -1 on failure.
324 * - 0 on no match.
325 * - 1 on match.
326 */
327int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
328{
329 int ret;
330 uint32_t options = 0;
331
332 char *our_subject = NULL;
333 bool dup_subject = true;
334 pcre2_match_data *match_data;
335
336 /*
337 * Thread local initialisation
338 */
339 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
340
341 if (regmatch) {
342#ifdef PCRE2_COPY_MATCHED_SUBJECT
343 /*
344 * This is apparently only supported for pcre2_match
345 * NOT pcre2_jit_match.
346 */
347# ifdef PCRE2_CONFIG_JIT
348 if (!preg->jitd) {
349# endif
350 dup_subject = false;
351
352 /*
353 * If PCRE2_COPY_MATCHED_SUBJECT is available
354 * and set as an options flag, pcre2_match will
355 * strdup the subject string if pcre2_match is
356 * successful and store a pointer to it in the
357 * regmatch struct.
358 *
359 * The lifetime of the string memory will be
360 * bound to the regmatch struct. This is more
361 * efficient that doing it ourselves, as the
362 * strdup only occurs if the subject matches.
363 */
364 options |= PCRE2_COPY_MATCHED_SUBJECT;
365# ifdef PCRE2_CONFIG_JIT
366 }
367# endif
368#endif
369 if (dup_subject) {
370 /*
371 * We have to dup and operate on the duplicate
372 * of the subject, because pcre2_jit_match and
373 * pcre2_match store a pointer to the subject
374 * in the regmatch structure.
375 */
376 subject = our_subject = talloc_bstrndup(regmatch, subject, len);
377 if (!subject) {
378 fr_strerror_const("Out of memory");
379 return -1;
380 }
381#ifndef NDEBUG
382 regmatch->subject = subject; /* Stored only for tracking memory issues */
383#endif
384 }
385 }
386
387 /*
388 * If we weren't given match data we
389 * need to alloc it else pcre2_match
390 * fails when passed NULL match data.
391 */
392 if (!regmatch) {
393 match_data = pcre2_match_data_create_from_pattern(preg->compiled, fr_pcre2_tls->gcontext);
394 if (!match_data) {
395 fr_strerror_const("Failed allocating temporary match data");
396 return -1;
397 }
398 } else {
399 match_data = regmatch->match_data;
400 }
401
402#ifdef PCRE2_CONFIG_JIT
403 if (preg->jitd) {
404 ret = pcre2_jit_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
405 match_data, fr_pcre2_tls->mcontext);
406 } else
407#endif
408 {
409 ret = pcre2_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
410 match_data, fr_pcre2_tls->mcontext);
411 }
412 if (!regmatch) pcre2_match_data_free(match_data);
413 if (ret < 0) {
414 PCRE2_UCHAR errbuff[128];
415
416 if (dup_subject) talloc_free(our_subject);
417
418 if (ret == PCRE2_ERROR_NOMATCH) {
419 if (regmatch) regmatch->used = 0;
420 return 0;
421 }
422
423 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
424 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
425
426 return -1;
427 }
428
429 if (regmatch) regmatch->used = ret;
430
431 return 1;
432}
433
434/** Wrapper around pcre2_substitute
435 *
436 * @param[in] ctx to allocate output string in.
437 * @param[out] out Output string with replacements performed.
438 * @param[in] max_out Maximum length of output buffer. If this is 0 then
439 * the output length is unlimited.
440 * @param[in] preg The compiled expression.
441 * @param[in] flags that affect matching.
442 * @param[in] subject to perform replacements on.
443 * @param[in] subject_len the length of the subject.
444 * @param[in] replacement replacement string containing substitution
445 * markers.
446 * @param[in] replacement_len Length of the replacement string.
447 * @param[in] regmatch Array of match pointers.
448 * @return
449 * - >= 0 the length of the output string.
450 * - < 0 on error.
451 */
452int regex_substitute(TALLOC_CTX *ctx, char **out, size_t max_out, regex_t *preg, fr_regex_flags_t const *flags,
453 char const *subject, size_t subject_len,
454 char const *replacement, size_t replacement_len,
455 fr_regmatch_t *regmatch)
456{
457 int ret;
458 uint32_t options = 0;
459 size_t buff_len, actual_len;
460 char *buff;
461
462#ifndef PCRE2_COPY_MATCHED_SUBJECT
463 char *our_subject = NULL;
464#endif
465
466 /*
467 * Thread local initialisation
468 */
469 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
470
471 /*
472 * Internally pcre2_substitute just calls pcre2_match to
473 * generate the match data, so the same hack as the
474 * regex_exec function above is required.
475 */
476 if (regmatch) {
477#ifndef PCRE2_COPY_MATCHED_SUBJECT
478 /*
479 * We have to dup and operate on the duplicate
480 * of the subject, because pcre2_jit_match and
481 * pcre2_match store a pointer to the subject
482 * in the regmatch structure.
483 */
484 subject = our_subject = talloc_bstrndup(regmatch, subject, subject_len);
485 if (!subject) {
486 fr_strerror_const("Out of memory");
487 return -1;
488 }
489#else
490 /*
491 * If PCRE2_COPY_MATCHED_SUBJECT is available
492 * and set as an options flag, pcre2_match will
493 * strdup the subject string if pcre2_match is
494 * successful and store a pointer to it in the
495 * regmatch struct.
496 *
497 * The lifetime of the string memory will be
498 * bound to the regmatch struct. This is more
499 * efficient that doing it ourselves, as the
500 * strdup only occurs if the subject matches.
501 */
502 options |= PCRE2_COPY_MATCHED_SUBJECT;
503#endif
504 }
505
506 /*
507 * Guess (badly) what the length of the output buffer should be
508 */
509 actual_len = buff_len = subject_len + 1; /* +1 for the \0 */
510 buff = talloc_array(ctx, char, buff_len);
511 if (!buff) {
512#ifndef PCRE2_COPY_MATCHED_SUBJECT
513 talloc_free(our_subject);
514#endif
515 fr_strerror_const("Out of memory");
516 return -1;
517 }
518
519 options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
520 if (flags->global) options |= PCRE2_SUBSTITUTE_GLOBAL;
521
522again:
523 /*
524 * actual_len input value should be the size of the
525 * buffer including space for '\0'.
526 * If input buffer is too small, then actual_len will be set
527 * to the buffer space needed including space for '\0'.
528 * If input buffer is the correct size, then actual_len
529 * will be set to the size of the string written to buff
530 * without the terminating '\0'.
531 */
532 ret = pcre2_substitute(preg->compiled,
533 (PCRE2_SPTR8)subject, (PCRE2_SIZE)subject_len, 0,
534 options, NULL, fr_pcre2_tls->mcontext,
535 (PCRE2_UCHAR const *)replacement, replacement_len, (PCRE2_UCHAR *)buff, &actual_len);
536
537 if (ret < 0) {
538 PCRE2_UCHAR errbuff[128];
539
540#ifndef PCRE2_COPY_MATCHED_SUBJECT
541 talloc_free(our_subject);
542#endif
544
545 if (ret == PCRE2_ERROR_NOMEMORY) {
546 if ((max_out > 0) && (actual_len > max_out)) {
547 fr_strerror_printf("String length with substitutions (%zu) "
548 "exceeds max string length (%zu)", actual_len - 1, max_out - 1);
549 return -1;
550 }
551
552 /*
553 * Check that actual_len != buff_len as that'd be
554 * an actual error.
555 */
556 if (actual_len == buff_len) {
557 fr_strerror_const("libpcre2 out of memory");
558 return -1;
559 }
560 buff_len = actual_len; /* The length we get passed back includes the \0 */
561 buff = talloc_array(ctx, char, buff_len);
562 if (!buff) {
563 fr_strerror_const("Out of memory");
564 return -1;
565 }
566 goto again;
567 }
568
569 if (ret == PCRE2_ERROR_NOMATCH) {
570 if (regmatch) regmatch->used = 0;
571 return 0;
572 }
573
574 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
575 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
576 return -1;
577 }
578
579 /*
580 * Trim the replacement buffer to the correct length
581 *
582 * buff_len includes \0.
583 * ...and as pcre2_substitute just succeeded actual_len does not include \0.
584 */
585 if (actual_len < (buff_len - 1)) {
586 buff = talloc_bstr_realloc(ctx, buff, actual_len);
587 if (!buff) {
588 fr_strerror_const("reallocing pcre2_substitute result buffer failed");
589 return -1;
590 }
591 }
592
593 if (regmatch) regmatch->used = ret;
594 *out = buff;
595
596 return 1;
597}
598
599
600/** Returns the number of subcapture groups
601 *
602 * @return
603 * - >0 The number of subcaptures contained within the pattern
604 * - 0 if the number of subcaptures can't be determined.
605 */
606uint32_t regex_subcapture_count(regex_t const *preg)
607{
609
610 if (pcre2_pattern_info(preg->compiled, PCRE2_INFO_CAPTURECOUNT, &count) != 0) {
611 fr_strerror_const("Error determining subcapture group count");
612 return 0;
613 }
614
615 return count + 1;
616}
617
618/** Free libpcre2's matchdata
619 *
620 * @note Don't call directly, will be called if talloc_free is called on a #regmatch_t.
621 */
622static int _pcre2_match_data_free(fr_regmatch_t *regmatch)
623{
624 pcre2_match_data_free(regmatch->match_data);
625 return 0;
626}
627
628/** Allocate vectors to fill with match data
629 *
630 * @param[in] ctx to allocate match vectors in.
631 * @param[in] count The number of vectors to allocate.
632 * @return
633 * - NULL on error.
634 * - Array of match vectors.
635 */
636fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
637{
638 fr_regmatch_t *regmatch;
639
640 /*
641 * Thread local initialisation
642 */
643 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return NULL;
644
645 regmatch = talloc(ctx, fr_regmatch_t);
646 if (!regmatch) {
647 oom:
648 fr_strerror_const("Out of memory");
649 return NULL;
650 }
651
652 regmatch->match_data = pcre2_match_data_create(count, fr_pcre2_tls->gcontext);
653 if (!regmatch->match_data) {
654 talloc_free(regmatch);
655 goto oom;
656 }
657 talloc_set_type(regmatch->match_data, pcre2_match_data);
658
659 talloc_set_destructor(regmatch, _pcre2_match_data_free);
660
661 return regmatch;
662}
663
664/*
665 *######################################
666 *# FUNCTIONS FOR POSIX-REGEX #
667 *######################################
668 */
669# else
670/*
671 * Wrapper functions for POSIX like, and extended regular
672 * expressions. These use the system regex library.
673 */
674
675/** Free heap allocated regex_t structure
676 *
677 * Heap allocation of regex_t is needed so regex_compile has the same signature with
678 * POSIX or libpcre.
679 *
680 * @param preg to free.
681 */
682static int _regex_free(regex_t *preg)
683{
684 regfree(preg);
685
686 return 0;
687}
688
689/** Binary safe wrapper around regcomp
690 *
691 * If we have the BSD extensions we don't need to do any special work
692 * if we don't have the BSD extensions we need to check to see if the
693 * regular expression contains any \0 bytes.
694 *
695 * If it does we fail and print the appropriate error message.
696 *
697 * @note Compiled expression must be freed with talloc_free.
698 *
699 * @param[in] ctx To allocate memory in.
700 * @param[out] out Where to write out a pointer
701 * to the structure containing the compiled expression.
702 * @param[in] pattern to compile.
703 * @param[in] len of pattern.
704 * @param[in] flags controlling matching. May be NULL.
705 * @param[in] subcaptures Whether to compile the regular expression
706 * to store subcapture data.
707 * @param[in] runtime Whether the compilation is being done at runtime.
708 * @return
709 * - >= 1 on success.
710 * - <= 0 on error. Negative value is offset of parse error.
711 * With POSIX regex we only give the correct offset for embedded \0 errors.
712 */
713ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
714 fr_regex_flags_t const *flags, bool subcaptures, UNUSED bool runtime)
715{
716 int ret;
717 int cflags = REG_EXTENDED;
718 regex_t *preg;
719
720 if (len == 0) {
721 fr_strerror_const("Empty expression");
722 return 0;
723 }
724
725 /*
726 * Options
727 */
728 if (flags) {
729 if (flags->global) {
730 fr_strerror_const("g - Global matching/substitution not supported with posix-regex");
731 return 0;
732 }
733 if (flags->dot_all) {
734 fr_strerror_const("s - Single line matching is not supported with posix-regex");
735 return 0;
736 }
737 if (flags->unicode) {
738 fr_strerror_const("u - Unicode matching not supported with posix-regex");
739 return 0;
740 }
741 if (flags->extended) {
742 fr_strerror_const("x - Whitespace and comments not supported with posix-regex");
743 return 0;
744 }
745
746 if (flags->ignore_case) cflags |= REG_ICASE;
747 if (flags->multiline) cflags |= REG_NEWLINE;
748 }
749
750
751 if (!subcaptures) cflags |= REG_NOSUB;
752
753#ifndef HAVE_REGNCOMP
754 {
755 char const *p;
756
757 p = pattern;
758 p += strlen(pattern);
759
760 if ((size_t)(p - pattern) != len) {
761 fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation",
762 (p - pattern));
763 return -(p - pattern);
764 }
765
766 preg = talloc_zero(ctx, regex_t);
767 if (!preg) return 0;
768
769 ret = regcomp(preg, pattern, cflags);
770 }
771#else
772 preg = talloc_zero(ctx, regex_t);
773 if (!preg) return 0;
774 ret = regncomp(preg, pattern, len, cflags);
775#endif
776 if (ret != 0) {
777 char errbuf[128];
778
779 regerror(ret, preg, errbuf, sizeof(errbuf));
780 fr_strerror_printf("%s", errbuf);
781
782 talloc_free(preg);
783
784 return 0; /* POSIX expressions don't give us the failure offset */
785 }
786
787 talloc_set_destructor(preg, _regex_free);
788 *out = preg;
789
790 return len;
791}
792
793/** Binary safe wrapper around regexec
794 *
795 * If we have the BSD extensions we don't need to do any special work
796 * If we don't have the BSD extensions we need to check to see if the
797 * value to be compared contains any \0 bytes.
798 *
799 * If it does, we fail and print the appropriate error message.
800 *
801 * @param[in] preg The compiled expression.
802 * @param[in] subject to match.
803 * @param[in] regmatch Match result structure.
804 * @return
805 * - -1 on failure.
806 * - 0 on no match.
807 * - 1 on match.
808 */
809int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
810{
811 int ret;
812 size_t matches;
813
814 /*
815 * Disable capturing
816 */
817 if (!regmatch) {
818 matches = 0;
819 } else {
820 matches = regmatch->allocd;
821
822 /*
823 * Reset the match result structure
824 */
825 memset(regmatch->match_data, 0, sizeof(regmatch->match_data[0]) * matches);
826 regmatch->used = 0;
827 }
828
829#ifndef HAVE_REGNEXEC
830 {
831 char const *p;
832
833 p = subject;
834 p += strlen(subject);
835
836 if ((size_t)(p - subject) != len) {
837 fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation",
838 (p - subject));
839 if (regmatch) regmatch->used = 0;
840 return -1;
841 }
842 ret = regexec(preg, subject, matches, regmatch ? regmatch->match_data : NULL, 0);
843 }
844#else
845 ret = regnexec(preg, subject, len, matches, regmatch ? regmatch->match_data : NULL, 0);
846#endif
847 if (ret != 0) {
848 if (ret != REG_NOMATCH) {
849 char errbuf[128];
850
851 regerror(ret, preg, errbuf, sizeof(errbuf));
852
853 fr_strerror_printf("regex evaluation failed: %s", errbuf);
854 return -1;
855 }
856 return 0;
857 }
858
859 /*
860 * Update regmatch->count to be the maximum number of
861 * groups that *could* have been populated as we don't
862 * have the number of matches.
863 */
864 if (regmatch) {
865 regmatch->used = preg->re_nsub + 1;
866
867 if (regmatch->subject) talloc_const_free(regmatch->subject);
868 regmatch->subject = talloc_bstrndup(regmatch, subject, len);
869 if (!regmatch->subject) {
870 fr_strerror_const("Out of memory");
871 return -1;
872 }
873 }
874 return 1;
875}
876
877/** Returns the number of subcapture groups
878 *
879 * @return
880 * - 0 we can't determine this for POSIX regular expressions.
881 */
882uint32_t regex_subcapture_count(UNUSED regex_t const *preg)
883{
884 return 0;
885}
886# endif
887
888# if defined(HAVE_REGEX_POSIX)
889/** Allocate vectors to fill with match data
890 *
891 * @param[in] ctx to allocate match vectors in.
892 * @param[in] count The number of vectors to allocate.
893 * @return
894 * - NULL on error.
895 * - Array of match vectors.
896 */
897fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
898{
899 fr_regmatch_t *regmatch;
900
901 /*
902 * Pre-allocate space for the match structure
903 * and for a 128b subject string.
904 */
905 regmatch = talloc_zero_pooled_object(ctx, fr_regmatch_t, 2, (sizeof(regmatch_t) * count) + 128);
906 if (unlikely(!regmatch)) {
907 error:
908 fr_strerror_const("Out of memory");
909 talloc_free(regmatch);
910 return NULL;
911 }
912 regmatch->match_data = talloc_array(regmatch, regmatch_t, count);
913 if (unlikely(!regmatch->match_data)) goto error;
914
915 regmatch->allocd = count;
916 regmatch->used = 0;
917 regmatch->subject = NULL;
918
919 return regmatch;
920}
921# endif
922
923/*
924 *########################################
925 *# UNIVERSAL FUNCTIONS #
926 *########################################
927 */
928
929/** Parse a string containing one or more regex flags
930 *
931 * @param[out] err May be NULL. If not NULL will be set to:
932 * - 0 on success.
933 * - -1 on unknown flag.
934 * - -2 on duplicate.
935 * @param[out] out Flag structure to populate. Must be initialised to zero
936 * if this is the first call to regex_flags_parse.
937 * @param[in] in Flag string to parse.
938 * @param[in] terminals Terminal characters. If parsing ends before the buffer
939 * is exhausted, and is pointing to one of these chars
940 * it's not considered an error.
941 * @param[in] err_on_dup Error if the flag is already set.
942 * @return
943 * - > 0 on success. The number of flag bytes parsed.
944 * - <= 0 on failure. Negative offset of first unrecognised flag.
945 */
946fr_slen_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in,
947 fr_sbuff_term_t const *terminals, bool err_on_dup)
948{
949 fr_sbuff_t our_in = FR_SBUFF(in);
950
951 if (err) *err = 0;
952
953 while (fr_sbuff_extend(&our_in)) {
954 switch (*our_in.p) {
955#define DO_REGEX_FLAG(_f, _c) \
956 case _c: \
957 if (err_on_dup && out->_f) { \
958 fr_strerror_printf("Duplicate regex flag '%c'", *our_in.p); \
959 if (err) *err = -2; \
960 FR_SBUFF_ERROR_RETURN(&our_in); \
961 } \
962 out->_f = 1; \
963 break
964
965 DO_REGEX_FLAG(global, 'g');
966 DO_REGEX_FLAG(ignore_case, 'i');
967 DO_REGEX_FLAG(multiline, 'm');
968 DO_REGEX_FLAG(dot_all, 's');
969 DO_REGEX_FLAG(unicode, 'u');
970 DO_REGEX_FLAG(extended, 'x');
971#undef DO_REGEX_FLAG
972
973 default:
974 if (fr_sbuff_is_terminal(&our_in, terminals)) FR_SBUFF_SET_RETURN(in, &our_in);
975
976 fr_strerror_printf("Unsupported regex flag '%c'", *our_in.p);
977 if (err) *err = -1;
978 FR_SBUFF_ERROR_RETURN(&our_in);
979 }
980 fr_sbuff_advance(&our_in, 1);
981 }
982 FR_SBUFF_SET_RETURN(in, &our_in);
983}
984
985/** Print the flags
986 *
987 * @param[out] sbuff where to write flags.
988 * @param[in] flags to print.
989 * @return
990 * - The number of bytes written to the out buffer.
991 * - A number >= outlen if truncation has occurred.
992 */
993ssize_t regex_flags_print(fr_sbuff_t *sbuff, fr_regex_flags_t const *flags)
994{
995 fr_sbuff_t our_sbuff = FR_SBUFF(sbuff);
996
997#define DO_REGEX_FLAG(_f, _c) \
998 if (flags->_f) FR_SBUFF_IN_CHAR_RETURN(&our_sbuff, _c)
999
1000 DO_REGEX_FLAG(global, 'g');
1001 DO_REGEX_FLAG(ignore_case, 'i');
1002 DO_REGEX_FLAG(multiline, 'm');
1003 DO_REGEX_FLAG(dot_all, 's');
1004 DO_REGEX_FLAG(unicode, 'u');
1005 DO_REGEX_FLAG(extended, 'x');
1006#undef DO_REGEX_FLAG
1007
1008 FR_SBUFF_SET_RETURN(sbuff, &our_sbuff);
1009}
1010#endif
1011
1012/** Compare two boxes using an operator
1013 *
1014 * @todo - allow /foo/i on the RHS
1015 *
1016 * However, this involves allocating intermediate sbuffs for the
1017 * unescaped RHS, and all kinds of extra work. It's not overly hard,
1018 * but it's something we wish to avoid for now.
1019 *
1020 * @param[in] op to use in comparison. MUST be T_OP_REG_EQ or T_OP_REG_NE
1021 * @param[in] a Value to compare, MUST be FR_TYPE_STRING
1022 * @param[in] b uncompiled regex as FR_TYPE_STRING
1023 * @return
1024 * - 1 if true
1025 * - 0 if false
1026 * - -1 on failure.
1027 */
1029{
1030 int rcode;
1031 TALLOC_CTX *ctx = NULL;
1032 size_t lhs_len;
1033 char const *lhs;
1034 regex_t *regex = NULL;
1035
1036 if (!((op == T_OP_REG_EQ) || (op == T_OP_REG_NE))) {
1037 fr_strerror_const("Invalid operator for regex comparison");
1038 return -1;
1039 }
1040
1041 if (b->type != FR_TYPE_STRING) {
1042 fr_strerror_const("RHS must be regular expression");
1043 return -1;
1044 }
1045
1046 ctx = talloc_init_const("regex_cmp_op");
1047 if (!ctx) return -1;
1048
1049 if ((a->type != FR_TYPE_STRING) && (a->type != FR_TYPE_OCTETS)) {
1050 fr_slen_t slen;
1051 char *p;
1052
1053 slen = fr_value_box_aprint(ctx, &p, a, NULL); /* no escaping */
1054 if (slen < 0) return slen;
1055
1056 lhs = p;
1057 lhs_len = slen;
1058
1059 } else {
1060 lhs = a->vb_strvalue;
1061 lhs_len = a->vb_length;
1062 }
1063
1064 if (regex_compile(ctx, &regex, b->vb_strvalue, b->vb_length, NULL, false, true) < 0) {
1065 talloc_free(ctx);
1066 return -1;
1067 }
1068
1069#ifdef STATIC_ANALYZER
1070 if (!regex) {
1071 talloc_free(ctx);
1072 return -1;
1073 }
1074#endif
1075
1076 rcode = regex_exec(regex, lhs, lhs_len, NULL);
1077 talloc_free(ctx);
1078 if (rcode < 0) return rcode;
1079
1080 /*
1081 * Invert the sense of the rcode for !~
1082 */
1083 if (op == T_OP_REG_NE) rcode = (rcode == 0);
1084
1085 return rcode;
1086}
#define fr_atexit_thread_local(_name, _free, _uctx)
Definition atexit.h:224
#define RCSID(id)
Definition build.h:512
#define unlikely(_x)
Definition build.h:407
#define UNUSED
Definition build.h:336
static fr_slen_t err
Definition dict.h:882
static fr_slen_t in
Definition dict.h:882
talloc_free(hp)
@ FR_TYPE_STRING
String of printable characters.
@ FR_TYPE_OCTETS
Raw octets.
unsigned int uint32_t
long int ssize_t
unsigned char uint8_t
ssize_t fr_slen_t
bool fr_sbuff_is_terminal(fr_sbuff_t *in, fr_sbuff_term_t const *tt)
Efficient terminal string search.
Definition sbuff.c:2197
#define fr_sbuff_extend(_sbuff_or_marker)
#define FR_SBUFF_ERROR_RETURN(_sbuff_or_marker)
#define FR_SBUFF_SET_RETURN(_dst, _src)
#define SBUFF_CHAR_UNPRINTABLES_EXTENDED
#define FR_SBUFF(_sbuff_or_marker)
#define fr_sbuff_advance(_sbuff_or_marker, _len)
#define SBUFF_CHAR_UNPRINTABLES_LOW
Set of terminal elements.
static char buff[sizeof("18446744073709551615")+3]
Definition size_tests.c:37
return count
Definition module.c:155
char * talloc_bstr_realloc(TALLOC_CTX *ctx, char *in, size_t inlen)
Trim a bstr (char) buffer.
Definition talloc.c:682
char * talloc_bstrndup(TALLOC_CTX *ctx, char const *in, size_t inlen)
Binary safe strndup function.
Definition talloc.c:618
#define talloc_zero_pooled_object(_ctx, _type, _num_subobjects, _total_subobjects_size)
Definition talloc.h:208
static int talloc_const_free(void const *ptr)
Free const'd memory.
Definition talloc.h:254
static TALLOC_CTX * talloc_init_const(char const *name)
Allocate a top level chunk with a constant name.
Definition talloc.h:127
enum fr_token fr_token_t
@ T_OP_REG_EQ
Definition token.h:100
@ T_OP_REG_NE
Definition token.h:101
int fr_regex_cmp_op(fr_token_t op, fr_value_box_t const *a, fr_value_box_t const *b)
Compare two boxes using an operator.
Definition regex.c:1028
#define fr_strerror_printf(_fmt,...)
Log to thread local error buffer.
Definition strerror.h:64
#define fr_strerror_const(_msg)
Definition strerror.h:223
static fr_slen_t fr_value_box_aprint(TALLOC_CTX *ctx, char **out, fr_value_box_t const *data, fr_sbuff_escape_rules_t const *e_rules) 1(fr_value_box_print
static size_t char ** out
Definition value.h:1030