The FreeRADIUS server $Id: 15bac2a4c627c01d1aa2047687b3418955ac7f00 $
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15 */
16
17/** Wrappers around various regular expression libraries
18 *
19 * @file src/lib/util/regex.c
20 *
21 * @copyright 2014 The FreeRADIUS server project
22 * @copyright 2014 Arran Cudbard-Bell (a.cudbardb@freeradius.org)
23 */
24RCSID("$Id: da5e43f7b0dc78ce88065e87045c8e3ec6d6f523 $")
25
26#ifdef HAVE_REGEX
27
28#include <freeradius-devel/util/regex.h>
29
30#if defined(HAVE_REGEX_PCRE2) && defined(PCRE2_CONFIG_JIT)
31#ifndef FR_PCRE_JIT_STACK_MIN
32# define FR_PCRE_JIT_STACK_MIN (128 * 1024)
33#endif
34#ifndef FR_PCRE_JIT_STACK_MAX
35# define FR_PCRE_JIT_STACK_MAX (512 * 1024)
36#endif
37#endif
38
39const fr_sbuff_escape_rules_t regex_escape_rules = {
40 .name = "regex",
41 .chr = '\\',
42 .subs = {
43 ['$'] = '$',
44 ['('] = '(',
45 ['*'] = '*',
46 ['+'] = '+',
47 ['.'] = '.',
48 ['/'] = '/',
49 ['?'] = '?',
50 ['['] = '[',
51 ['\\'] = '\\',
52 ['^'] = '^',
53 ['`'] = '`',
54 ['|'] = '|',
55 ['\a'] = 'a',
56 ['\b'] = 'b',
57 ['\n'] = 'n',
58 ['\r'] = 'r',
59 ['\t'] = 't',
60 ['\v'] = 'v'
61 },
62 .esc = {
65 },
66 .do_utf8 = true,
67 .do_oct = true
68};
69
70
71/*
72 *######################################
73 *# FUNCTIONS FOR LIBPCRE2 #
74 *######################################
75 */
76#ifdef HAVE_REGEX_PCRE2
77/*
78 * Wrapper functions for libpcre2. Much more powerful, and guaranteed
79 * to be binary safe for both patterns and subjects but require
80 * libpcre2.
81 */
82
83/** Thread local storage for PCRE2
84 *
85 * Not all this storage is thread local, but it simplifies cleanup if
86 * we bind its lifetime to the thread, and lets us get away with not
87 * having specific init/free functions.
88 */
89typedef struct {
90 TALLOC_CTX *alloc_ctx; //!< Context used for any allocations.
91 pcre2_general_context *gcontext; //!< General context.
92 pcre2_compile_context *ccontext; //!< Compile context.
93 pcre2_match_context *mcontext; //!< Match context.
94#ifdef PCRE2_CONFIG_JIT
95 pcre2_jit_stack *jit_stack; //!< Jit stack for executing jit'd patterns.
96 bool do_jit; //!< Whether we have runtime JIT support.
97#endif
98} fr_pcre2_tls_t;
99
100/** Thread local storage for pcre2
101 *
102 */
103static _Thread_local fr_pcre2_tls_t *fr_pcre2_tls;
104
105/** Talloc wrapper for pcre2 memory allocation
106 *
107 * @param[in] to_alloc How many bytes to alloc.
108 * @param[in] uctx UNUSED.
109 */
110static void *_pcre2_talloc(PCRE2_SIZE to_alloc, UNUSED void *uctx)
111{
112 return talloc_array(fr_pcre2_tls->alloc_ctx, uint8_t, to_alloc);
113}
114
115/** Talloc wrapper for pcre2 memory freeing
116 *
117 * @param[in] to_free Memory to free.
118 * @param[in] uctx UNUSED.
119 */
120static void _pcre2_talloc_free(void *to_free, UNUSED void *uctx)
121{
122 talloc_free(to_free);
123}
124
125/** Free thread local data
126 *
127 * @param[in] tls Thread local data to free.
128 */
129static int _pcre2_tls_free(fr_pcre2_tls_t *tls)
130{
131 if (tls->gcontext) pcre2_general_context_free(tls->gcontext);
132 if (tls->ccontext) pcre2_compile_context_free(tls->ccontext);
133 if (tls->mcontext) pcre2_match_context_free(tls->mcontext);
134#ifdef PCRE2_CONFIG_JIT
135 if (tls->jit_stack) pcre2_jit_stack_free(tls->jit_stack);
136#endif
137
138 return 0;
139}
140
141static int _pcre2_tls_free_on_exit(void *arg)
142{
143 return talloc_free(arg);
144}
145
146/** Thread local init for pcre2
147 *
148 */
149static int fr_pcre2_tls_init(void)
150{
151 fr_pcre2_tls_t *tls;
152
153 if (unlikely(fr_pcre2_tls != NULL)) return 0;
154
155 fr_pcre2_tls = tls = talloc_zero(NULL, fr_pcre2_tls_t);
156 if (!tls) return -1;
157 talloc_set_destructor(tls, _pcre2_tls_free);
158
159 tls->gcontext = pcre2_general_context_create(_pcre2_talloc, _pcre2_talloc_free, NULL);
160 if (!tls->gcontext) {
161 fr_strerror_const("Failed allocating general context");
162 return -1;
163 }
164
165 tls->ccontext = pcre2_compile_context_create(tls->gcontext);
166 if (!tls->ccontext) {
167 fr_strerror_const("Failed allocating compile context");
168 error:
169 fr_pcre2_tls = NULL;
170 _pcre2_tls_free(tls);
171 return -1;
172 }
173
174 tls->mcontext = pcre2_match_context_create(tls->gcontext);
175 if (!tls->mcontext) {
176 fr_strerror_const("Failed allocating match context");
177 goto error;
178 }
179
180#ifdef PCRE2_CONFIG_JIT
181 pcre2_config(PCRE2_CONFIG_JIT, &tls->do_jit);
182 if (tls->do_jit) {
183 tls->jit_stack = pcre2_jit_stack_create(FR_PCRE_JIT_STACK_MIN, FR_PCRE_JIT_STACK_MAX, tls->gcontext);
184 if (!tls->jit_stack) {
185 fr_strerror_const("Failed allocating JIT stack");
186 goto error;
187 }
188 pcre2_jit_stack_assign(tls->mcontext, NULL, tls->jit_stack);
189 }
190#endif
191
192 /*
193 * Free on thread exit
194 */
195 fr_atexit_thread_local(fr_pcre2_tls, _pcre2_tls_free_on_exit, tls);
196 fr_pcre2_tls = tls; /* Assign to thread local storage */
197
198 return 0;
199}
200
201/** Free regex_t structure
202 *
203 * Calls libpcre specific free functions for the expression and study.
204 *
205 * @param preg to free.
206 */
207static int _regex_free(regex_t *preg)
208{
209 if (preg->compiled) pcre2_code_free(preg->compiled);
210
211 return 0;
212}
213
214/** Wrapper around pcre2_compile
215 *
216 * Allows the rest of the code to do compilations using one function signature.
217 *
218 * @note Compiled expression must be freed with talloc_free.
219 *
220 * @param[out] out Where to write out a pointer to the structure containing
221 * the compiled expression.
222 * @param[in] pattern to compile.
223 * @param[in] len of pattern.
224 * @param[in] flags controlling matching. May be NULL.
225 * @param[in] subcaptures Whether to compile the regular expression to store subcapture
226 * data.
227 * @param[in] runtime If false run the pattern through the PCRE JIT (if available)
228 * to convert it to machine code. This trades startup time (longer)
229 * for runtime performance (better).
230 * @return
231 * - >= 1 on success.
232 * - <= 0 on error. Negative value is offset of parse error.
233 */
234ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
235 fr_regex_flags_t const *flags, bool subcaptures, bool runtime)
236{
237 int ret;
238 PCRE2_SIZE offset;
239 uint32_t cflags = 0;
240 regex_t *preg;
241
242 /*
243 * Check inputs
244 */
245 *out = NULL;
246
247 /*
248 * Thread local initialisation
249 */
250 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
251
252 if (len == 0) {
253 fr_strerror_const("Empty expression");
254 return 0;
255 }
256
257 /*
258 * Options
259 */
260 if (flags) {
261 /* flags->global implemented by substitution function */
262 if (flags->ignore_case) cflags |= PCRE2_CASELESS;
263 if (flags->multiline) cflags |= PCRE2_MULTILINE;
264 if (flags->dot_all) cflags |= PCRE2_DOTALL;
265 if (flags->unicode) cflags |= PCRE2_UTF;
266 if (flags->extended) cflags |= PCRE2_EXTENDED;
267 }
268
269 if (!subcaptures) cflags |= PCRE2_NO_AUTO_CAPTURE;
270
271 preg = talloc_zero(ctx, regex_t);
272 talloc_set_destructor(preg, _regex_free);
273
274 preg->compiled = pcre2_compile((PCRE2_SPTR8)pattern, len,
275 cflags, &ret, &offset, fr_pcre2_tls->ccontext);
276 if (!preg->compiled) {
277 PCRE2_UCHAR errbuff[128];
278
279 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
280 fr_strerror_printf("%s", (char *)errbuff);
281 talloc_free(preg);
282
283 return -(ssize_t)offset;
284 }
285
286 if (!runtime) {
287 preg->precompiled = true;
288
289#ifdef PCRE2_CONFIG_JIT
290 /*
291 * This is expensive, so only do it for
292 * expressions that are going to be
293 * evaluated repeatedly.
294 */
295 if (fr_pcre2_tls->do_jit) {
296 ret = pcre2_jit_compile(preg->compiled, PCRE2_JIT_COMPLETE);
297 if (ret < 0) {
298 PCRE2_UCHAR errbuff[128];
299
300 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
301 fr_strerror_printf("Pattern JIT failed: %s", (char *)errbuff);
302 talloc_free(preg);
303
304 return 0;
305 }
306 preg->jitd = true;
307 }
308#endif
309 }
310
311 *out = preg;
312
313 return len;
314}
315
316/** Wrapper around pcre2_exec
317 *
318 * @param[in] preg The compiled expression.
319 * @param[in] subject to match.
320 * @param[in] len Length of subject.
321 * @param[in] regmatch Array of match pointers.
322 * @return
323 * - -1 on failure.
324 * - 0 on no match.
325 * - 1 on match.
326 */
327int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
328{
329 int ret;
330 uint32_t options = 0;
331
332 char *our_subject = NULL;
333 bool dup_subject = true;
334 pcre2_match_data *match_data;
335
336 /*
337 * Thread local initialisation
338 */
339 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
340
341 if (regmatch) {
342#ifdef PCRE2_COPY_MATCHED_SUBJECT
343 /*
344 * This is apparently only supported for pcre2_match
345 * NOT pcre2_jit_match.
346 */
347# ifdef PCRE2_CONFIG_JIT
348 if (!preg->jitd) {
349# endif
350 dup_subject = false;
351
352 /*
353 * If PCRE2_COPY_MATCHED_SUBJECT is available
354 * and set as an options flag, pcre2_match will
355 * strdup the subject string if pcre2_match is
356 * successful and store a pointer to it in the
357 * regmatch struct.
358 *
359 * The lifetime of the string memory will be
360 * bound to the regmatch struct. This is more
361 * efficient that doing it ourselves, as the
362 * strdup only occurs if the subject matches.
363 */
364 options |= PCRE2_COPY_MATCHED_SUBJECT;
365# ifdef PCRE2_CONFIG_JIT
366 }
367# endif
368#endif
369 if (dup_subject) {
370 /*
371 * We have to dup and operate on the duplicate
372 * of the subject, because pcre2_jit_match and
373 * pcre2_match store a pointer to the subject
374 * in the regmatch structure.
375 */
376 subject = our_subject = talloc_bstrndup(regmatch, subject, len);
377 if (!subject) {
378 fr_strerror_const("Out of memory");
379 return -1;
380 }
381#ifndef NDEBUG
382 regmatch->subject = subject; /* Stored only for tracking memory issues */
383#endif
384 }
385 }
386
387 /*
388 * If we weren't given match data we
389 * need to alloc it else pcre2_match
390 * fails when passed NULL match data.
391 */
392 if (!regmatch) {
393 match_data = pcre2_match_data_create_from_pattern(preg->compiled, fr_pcre2_tls->gcontext);
394 if (!match_data) {
395 fr_strerror_const("Failed allocating temporary match data");
396 return -1;
397 }
398 } else {
399 match_data = regmatch->match_data;
400 }
401
402#ifdef PCRE2_CONFIG_JIT
403 if (preg->jitd) {
404 ret = pcre2_jit_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
405 match_data, fr_pcre2_tls->mcontext);
406 } else
407#endif
408 {
409 ret = pcre2_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
410 match_data, fr_pcre2_tls->mcontext);
411 }
412 if (!regmatch) pcre2_match_data_free(match_data);
413 if (ret < 0) {
414 PCRE2_UCHAR errbuff[128];
415
416 if (dup_subject) talloc_free(our_subject);
417
418 if (ret == PCRE2_ERROR_NOMATCH) {
419 if (regmatch) regmatch->used = 0;
420 return 0;
421 }
422
423 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
424 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
425
426 return -1;
427 }
428
429 if (regmatch) regmatch->used = ret;
430
431 return 1;
432}
433
434/** Wrapper around pcre2_substitute
435 *
436 * @param[in] ctx to allocate output string in.
437 * @param[out] out Output string with replacements performed.
438 * @param[in] max_out Maximum length of output buffer. If this is 0 then
439 * the output length is unlimited.
440 * @param[in] preg The compiled expression.
441 * @param[in] flags that affect matching.
442 * @param[in] subject to perform replacements on.
443 * @param[in] subject_len the length of the subject.
444 * @param[in] replacement replacement string containing substitution
445 * markers.
446 * @param[in] replacement_len Length of the replacement string.
447 * @param[in] regmatch Array of match pointers.
448 * @return
449 * - >= 0 the length of the output string.
450 * - < 0 on error.
451 */
452int regex_substitute(TALLOC_CTX *ctx, char **out, size_t max_out, regex_t *preg, fr_regex_flags_t const *flags,
453 char const *subject, size_t subject_len,
454 char const *replacement, size_t replacement_len,
455 fr_regmatch_t *regmatch)
456{
457 int ret;
458 uint32_t options = 0;
459 size_t buff_len, actual_len;
460 char *buff;
461
462#ifndef PCRE2_COPY_MATCHED_SUBJECT
463 char *our_subject = NULL;
464#endif
465
466 /*
467 * Thread local initialisation
468 */
469 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
470
471 /*
472 * Internally pcre2_substitute just calls pcre2_match to
473 * generate the match data, so the same hack as the
474 * regex_exec function above is required.
475 */
476 if (regmatch) {
477#ifndef PCRE2_COPY_MATCHED_SUBJECT
478 /*
479 * We have to dup and operate on the duplicate
480 * of the subject, because pcre2_jit_match and
481 * pcre2_match store a pointer to the subject
482 * in the regmatch structure.
483 */
484 subject = our_subject = talloc_bstrndup(regmatch, subject, subject_len);
485 if (!subject) {
486 fr_strerror_const("Out of memory");
487 return -1;
488 }
489#else
490 /*
491 * If PCRE2_COPY_MATCHED_SUBJECT is available
492 * and set as an options flag, pcre2_match will
493 * strdup the subject string if pcre2_match is
494 * successful and store a pointer to it in the
495 * regmatch struct.
496 *
497 * The lifetime of the string memory will be
498 * bound to the regmatch struct. This is more
499 * efficient that doing it ourselves, as the
500 * strdup only occurs if the subject matches.
501 */
502 options |= PCRE2_COPY_MATCHED_SUBJECT;
503#endif
504 }
505
506 /*
507 * Guess (badly) what the length of the output buffer should be
508 */
509 actual_len = buff_len = subject_len + 1; /* +1 for the \0 */
510 buff = talloc_array(ctx, char, buff_len);
511 if (!buff) {
512#ifndef PCRE2_COPY_MATCHED_SUBJECT
513 talloc_free(our_subject);
514#endif
515 fr_strerror_const("Out of memory");
516 return -1;
517 }
518
519 options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
520 if (flags->global) options |= PCRE2_SUBSTITUTE_GLOBAL;
521
522again:
523 /*
524 * actual_len input value should be the size of the
525 * buffer including space for '\0'.
526 * If input buffer is too small, then actual_len will be set
527 * to the buffer space needed including space for '\0'.
528 * If input buffer is the correct size, then actual_len
529 * will be set to the size of the string written to buff
530 * without the terminating '\0'.
531 */
532 ret = pcre2_substitute(preg->compiled,
533 (PCRE2_SPTR8)subject, (PCRE2_SIZE)subject_len, 0,
534 options, NULL, fr_pcre2_tls->mcontext,
535 (PCRE2_UCHAR const *)replacement, replacement_len, (PCRE2_UCHAR *)buff, &actual_len);
536
537 if (ret < 0) {
538 PCRE2_UCHAR errbuff[128];
539
540#ifndef PCRE2_COPY_MATCHED_SUBJECT
541 talloc_free(our_subject);
542#endif
544
545 if (ret == PCRE2_ERROR_NOMEMORY) {
546 if ((max_out > 0) && (actual_len > max_out)) {
547 fr_strerror_printf("String length with substitutions (%zu) "
548 "exceeds max string length (%zu)", actual_len - 1, max_out - 1);
549 return -1;
550 }
551
552 /*
553 * Check that actual_len != buff_len as that'd be
554 * an actual error.
555 */
556 if (actual_len == buff_len) {
557 fr_strerror_const("libpcre2 out of memory");
558 return -1;
559 }
560 buff_len = actual_len; /* The length we get passed back includes the \0 */
561 buff = talloc_array(ctx, char, buff_len);
562 goto again;
563 }
564
565 if (ret == PCRE2_ERROR_NOMATCH) {
566 if (regmatch) regmatch->used = 0;
567 return 0;
568 }
569
570 pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
571 fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
572 return -1;
573 }
574
575 /*
576 * Trim the replacement buffer to the correct length
577 *
578 * buff_len includes \0.
579 * ...and as pcre2_substitute just succeeded actual_len does not include \0.
580 */
581 if (actual_len < (buff_len - 1)) {
582 buff = talloc_bstr_realloc(ctx, buff, actual_len);
583 if (!buff) {
584 fr_strerror_const("reallocing pcre2_substitute result buffer failed");
585 return -1;
586 }
587 }
588
589 if (regmatch) regmatch->used = ret;
590 *out = buff;
591
592 return 1;
593}
594
595
596/** Returns the number of subcapture groups
597 *
598 * @return
599 * - >0 The number of subcaptures contained within the pattern
600 * - 0 if the number of subcaptures can't be determined.
601 */
602uint32_t regex_subcapture_count(regex_t const *preg)
603{
605
606 if (pcre2_pattern_info(preg->compiled, PCRE2_INFO_CAPTURECOUNT, &count) != 0) {
607 fr_strerror_const("Error determining subcapture group count");
608 return 0;
609 }
610
611 return count + 1;
612}
613
614/** Free libpcre2's matchdata
615 *
616 * @note Don't call directly, will be called if talloc_free is called on a #regmatch_t.
617 */
618static int _pcre2_match_data_free(fr_regmatch_t *regmatch)
619{
620 pcre2_match_data_free(regmatch->match_data);
621 return 0;
622}
623
624/** Allocate vectors to fill with match data
625 *
626 * @param[in] ctx to allocate match vectors in.
627 * @param[in] count The number of vectors to allocate.
628 * @return
629 * - NULL on error.
630 * - Array of match vectors.
631 */
632fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
633{
634 fr_regmatch_t *regmatch;
635
636 /*
637 * Thread local initialisation
638 */
639 if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return NULL;
640
641 regmatch = talloc(ctx, fr_regmatch_t);
642 if (!regmatch) {
643 oom:
644 fr_strerror_const("Out of memory");
645 return NULL;
646 }
647
648 regmatch->match_data = pcre2_match_data_create(count, fr_pcre2_tls->gcontext);
649 if (!regmatch->match_data) {
650 talloc_free(regmatch);
651 goto oom;
652 }
653 talloc_set_type(regmatch->match_data, pcre2_match_data);
654
655 talloc_set_destructor(regmatch, _pcre2_match_data_free);
656
657 return regmatch;
658}
659
660/*
661 *######################################
662 *# FUNCTIONS FOR POSIX-REGEX #
663 *######################################
664 */
665# else
666/*
667 * Wrapper functions for POSIX like, and extended regular
668 * expressions. These use the system regex library.
669 */
670
671/** Free heap allocated regex_t structure
672 *
673 * Heap allocation of regex_t is needed so regex_compile has the same signature with
674 * POSIX or libpcre.
675 *
676 * @param preg to free.
677 */
678static int _regex_free(regex_t *preg)
679{
680 regfree(preg);
681
682 return 0;
683}
684
685/** Binary safe wrapper around regcomp
686 *
687 * If we have the BSD extensions we don't need to do any special work
688 * if we don't have the BSD extensions we need to check to see if the
689 * regular expression contains any \0 bytes.
690 *
691 * If it does we fail and print the appropriate error message.
692 *
693 * @note Compiled expression must be freed with talloc_free.
694 *
695 * @param[in] ctx To allocate memory in.
696 * @param[out] out Where to write out a pointer
697 * to the structure containing the compiled expression.
698 * @param[in] pattern to compile.
699 * @param[in] len of pattern.
700 * @param[in] flags controlling matching. May be NULL.
701 * @param[in] subcaptures Whether to compile the regular expression
702 * to store subcapture data.
703 * @param[in] runtime Whether the compilation is being done at runtime.
704 * @return
705 * - >= 1 on success.
706 * - <= 0 on error. Negative value is offset of parse error.
707 * With POSIX regex we only give the correct offset for embedded \0 errors.
708 */
709ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
710 fr_regex_flags_t const *flags, bool subcaptures, UNUSED bool runtime)
711{
712 int ret;
713 int cflags = REG_EXTENDED;
714 regex_t *preg;
715
716 if (len == 0) {
717 fr_strerror_const("Empty expression");
718 return 0;
719 }
720
721 /*
722 * Options
723 */
724 if (flags) {
725 if (flags->global) {
726 fr_strerror_const("g - Global matching/substitution not supported with posix-regex");
727 return 0;
728 }
729 if (flags->dot_all) {
730 fr_strerror_const("s - Single line matching is not supported with posix-regex");
731 return 0;
732 }
733 if (flags->unicode) {
734 fr_strerror_const("u - Unicode matching not supported with posix-regex");
735 return 0;
736 }
737 if (flags->extended) {
738 fr_strerror_const("x - Whitespace and comments not supported with posix-regex");
739 return 0;
740 }
741
742 if (flags->ignore_case) cflags |= REG_ICASE;
743 if (flags->multiline) cflags |= REG_NEWLINE;
744 }
745
746
747 if (!subcaptures) cflags |= REG_NOSUB;
748
749#ifndef HAVE_REGNCOMP
750 {
751 char const *p;
752
753 p = pattern;
754 p += strlen(pattern);
755
756 if ((size_t)(p - pattern) != len) {
757 fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation",
758 (p - pattern));
759 return -(p - pattern);
760 }
761
762 preg = talloc_zero(ctx, regex_t);
763 if (!preg) return 0;
764
765 ret = regcomp(preg, pattern, cflags);
766 }
767#else
768 preg = talloc_zero(ctx, regex_t);
769 if (!preg) return 0;
770 ret = regncomp(preg, pattern, len, cflags);
771#endif
772 if (ret != 0) {
773 char errbuf[128];
774
775 regerror(ret, preg, errbuf, sizeof(errbuf));
776 fr_strerror_printf("%s", errbuf);
777
778 talloc_free(preg);
779
780 return 0; /* POSIX expressions don't give us the failure offset */
781 }
782
783 talloc_set_destructor(preg, _regex_free);
784 *out = preg;
785
786 return len;
787}
788
789/** Binary safe wrapper around regexec
790 *
791 * If we have the BSD extensions we don't need to do any special work
792 * If we don't have the BSD extensions we need to check to see if the
793 * value to be compared contains any \0 bytes.
794 *
795 * If it does, we fail and print the appropriate error message.
796 *
797 * @param[in] preg The compiled expression.
798 * @param[in] subject to match.
799 * @param[in] regmatch Match result structure.
800 * @return
801 * - -1 on failure.
802 * - 0 on no match.
803 * - 1 on match.
804 */
805int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
806{
807 int ret;
808 size_t matches;
809
810 /*
811 * Disable capturing
812 */
813 if (!regmatch) {
814 matches = 0;
815 } else {
816 matches = regmatch->allocd;
817
818 /*
819 * Reset the match result structure
820 */
821 memset(regmatch->match_data, 0, sizeof(regmatch->match_data[0]) * matches);
822 regmatch->used = 0;
823 }
824
825#ifndef HAVE_REGNEXEC
826 {
827 char const *p;
828
829 p = subject;
830 p += strlen(subject);
831
832 if ((size_t)(p - subject) != len) {
833 fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation",
834 (p - subject));
835 if (regmatch) regmatch->used = 0;
836 return -1;
837 }
838 ret = regexec(preg, subject, matches, regmatch ? regmatch->match_data : NULL, 0);
839 }
840#else
841 ret = regnexec(preg, subject, len, matches, regmatch ? regmatch->match_data : NULL, 0);
842#endif
843 if (ret != 0) {
844 if (ret != REG_NOMATCH) {
845 char errbuf[128];
846
847 regerror(ret, preg, errbuf, sizeof(errbuf));
848
849 fr_strerror_printf("regex evaluation failed: %s", errbuf);
850 return -1;
851 }
852 return 0;
853 }
854
855 /*
856 * Update regmatch->count to be the maximum number of
857 * groups that *could* have been populated as we don't
858 * have the number of matches.
859 */
860 if (regmatch) {
861 regmatch->used = preg->re_nsub + 1;
862
863 if (regmatch->subject) talloc_const_free(regmatch->subject);
864 regmatch->subject = talloc_bstrndup(regmatch, subject, len);
865 if (!regmatch->subject) {
866 fr_strerror_const("Out of memory");
867 return -1;
868 }
869 }
870 return 1;
871}
872
873/** Returns the number of subcapture groups
874 *
875 * @return
876 * - 0 we can't determine this for POSIX regular expressions.
877 */
878uint32_t regex_subcapture_count(UNUSED regex_t const *preg)
879{
880 return 0;
881}
882# endif
883
884# if defined(HAVE_REGEX_POSIX)
885/** Allocate vectors to fill with match data
886 *
887 * @param[in] ctx to allocate match vectors in.
888 * @param[in] count The number of vectors to allocate.
889 * @return
890 * - NULL on error.
891 * - Array of match vectors.
892 */
893fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
894{
895 fr_regmatch_t *regmatch;
896
897 /*
898 * Pre-allocate space for the match structure
899 * and for a 128b subject string.
900 */
901 regmatch = talloc_zero_pooled_object(ctx, fr_regmatch_t, 2, (sizeof(regmatch_t) * count) + 128);
902 if (unlikely(!regmatch)) {
903 error:
904 fr_strerror_const("Out of memory");
905 talloc_free(regmatch);
906 return NULL;
907 }
908 regmatch->match_data = talloc_array(regmatch, regmatch_t, count);
909 if (unlikely(!regmatch->match_data)) goto error;
910
911 regmatch->allocd = count;
912 regmatch->used = 0;
913 regmatch->subject = NULL;
914
915 return regmatch;
916}
917# endif
918
919/*
920 *########################################
921 *# UNIVERSAL FUNCTIONS #
922 *########################################
923 */
924
925/** Parse a string containing one or more regex flags
926 *
927 * @param[out] err May be NULL. If not NULL will be set to:
928 * - 0 on success.
929 * - -1 on unknown flag.
930 * - -2 on duplicate.
931 * @param[out] out Flag structure to populate. Must be initialised to zero
932 * if this is the first call to regex_flags_parse.
933 * @param[in] in Flag string to parse.
934 * @param[in] terminals Terminal characters. If parsing ends before the buffer
935 * is exhausted, and is pointing to one of these chars
936 * it's not considered an error.
937 * @param[in] err_on_dup Error if the flag is already set.
938 * @return
939 * - > 0 on success. The number of flag bytes parsed.
940 * - <= 0 on failure. Negative offset of first unrecognised flag.
941 */
942fr_slen_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in,
943 fr_sbuff_term_t const *terminals, bool err_on_dup)
944{
945 fr_sbuff_t our_in = FR_SBUFF(in);
946
947 if (err) *err = 0;
948
949 while (fr_sbuff_extend(&our_in)) {
950 switch (*our_in.p) {
951#define DO_REGEX_FLAG(_f, _c) \
952 case _c: \
953 if (err_on_dup && out->_f) { \
954 fr_strerror_printf("Duplicate regex flag '%c'", *our_in.p); \
955 if (err) *err = -2; \
956 FR_SBUFF_ERROR_RETURN(&our_in); \
957 } \
958 out->_f = 1; \
959 break
960
961 DO_REGEX_FLAG(global, 'g');
962 DO_REGEX_FLAG(ignore_case, 'i');
963 DO_REGEX_FLAG(multiline, 'm');
964 DO_REGEX_FLAG(dot_all, 's');
965 DO_REGEX_FLAG(unicode, 'u');
966 DO_REGEX_FLAG(extended, 'x');
967#undef DO_REGEX_FLAG
968
969 default:
970 if (fr_sbuff_is_terminal(&our_in, terminals)) FR_SBUFF_SET_RETURN(in, &our_in);
971
972 fr_strerror_printf("Unsupported regex flag '%c'", *our_in.p);
973 if (err) *err = -1;
974 FR_SBUFF_ERROR_RETURN(&our_in);
975 }
976 fr_sbuff_advance(&our_in, 1);
977 }
978 FR_SBUFF_SET_RETURN(in, &our_in);
979}
980
981/** Print the flags
982 *
983 * @param[out] sbuff where to write flags.
984 * @param[in] flags to print.
985 * @return
986 * - The number of bytes written to the out buffer.
987 * - A number >= outlen if truncation has occurred.
988 */
989ssize_t regex_flags_print(fr_sbuff_t *sbuff, fr_regex_flags_t const *flags)
990{
991 fr_sbuff_t our_sbuff = FR_SBUFF(sbuff);
992
993#define DO_REGEX_FLAG(_f, _c) \
994 if (flags->_f) FR_SBUFF_IN_CHAR_RETURN(&our_sbuff, _c)
995
996 DO_REGEX_FLAG(global, 'g');
997 DO_REGEX_FLAG(ignore_case, 'i');
998 DO_REGEX_FLAG(multiline, 'm');
999 DO_REGEX_FLAG(dot_all, 's');
1000 DO_REGEX_FLAG(unicode, 'u');
1001 DO_REGEX_FLAG(extended, 'x');
1002#undef DO_REGEX_FLAG
1003
1004 FR_SBUFF_SET_RETURN(sbuff, &our_sbuff);
1005}
1006#endif
1007
1008/** Compare two boxes using an operator
1009 *
1010 * @todo - allow /foo/i on the RHS
1011 *
1012 * However, this involves allocating intermediate sbuffs for the
1013 * unescaped RHS, and all kinds of extra work. It's not overly hard,
1014 * but it's something we wish to avoid for now.
1015 *
1016 * @param[in] op to use in comparison. MUST be T_OP_REG_EQ or T_OP_REG_NE
1017 * @param[in] a Value to compare, MUST be FR_TYPE_STRING
1018 * @param[in] b uncompiled regex as FR_TYPE_STRING
1019 * @return
1020 * - 1 if true
1021 * - 0 if false
1022 * - -1 on failure.
1023 */
1025{
1026 int rcode;
1027 TALLOC_CTX *ctx = NULL;
1028 size_t lhs_len;
1029 char const *lhs;
1030 regex_t *regex = NULL;
1031
1032 if (!((op == T_OP_REG_EQ) || (op == T_OP_REG_NE))) {
1033 fr_strerror_const("Invalid operator for regex comparison");
1034 return -1;
1035 }
1036
1037 if (b->type != FR_TYPE_STRING) {
1038 fr_strerror_const("RHS must be regular expression");
1039 return -1;
1040 }
1041
1042 ctx = talloc_init_const("regex_cmp_op");
1043 if (!ctx) return -1;
1044
1045 if ((a->type != FR_TYPE_STRING) && (a->type != FR_TYPE_OCTETS)) {
1046 fr_slen_t slen;
1047 char *p;
1048
1049 slen = fr_value_box_aprint(ctx, &p, a, NULL); /* no escaping */
1050 if (slen < 0) return slen;
1051
1052 lhs = p;
1053 lhs_len = slen;
1054
1055 } else {
1056 lhs = a->vb_strvalue;
1057 lhs_len = a->vb_length;
1058 }
1059
1060 if (regex_compile(ctx, &regex, b->vb_strvalue, b->vb_length, NULL, false, true) < 0) {
1061 talloc_free(ctx);
1062 return -1;
1063 }
1064
1065#ifdef STATIC_ANALYZER
1066 if (!regex) {
1067 talloc_free(ctx);
1068 return -1;
1069 }
1070#endif
1071
1072 rcode = regex_exec(regex, lhs, lhs_len, NULL);
1073 talloc_free(ctx);
1074 if (rcode < 0) return rcode;
1075
1076 /*
1077 * Invert the sense of the rcode for !~
1078 */
1079 if (op == T_OP_REG_NE) rcode = (rcode == 0);
1080
1081 return rcode;
1082}
#define fr_atexit_thread_local(_name, _free, _uctx)
Definition atexit.h:220
#define RCSID(id)
Definition build.h:506
#define unlikely(_x)
Definition build.h:402
#define UNUSED
Definition build.h:336
static fr_slen_t err
Definition dict.h:882
static fr_slen_t in
Definition dict.h:882
talloc_free(hp)
@ FR_TYPE_STRING
String of printable characters.
@ FR_TYPE_OCTETS
Raw octets.
unsigned int uint32_t
long int ssize_t
unsigned char uint8_t
ssize_t fr_slen_t
bool fr_sbuff_is_terminal(fr_sbuff_t *in, fr_sbuff_term_t const *tt)
Efficient terminal string search.
Definition sbuff.c:2193
#define fr_sbuff_extend(_sbuff_or_marker)
#define FR_SBUFF_ERROR_RETURN(_sbuff_or_marker)
#define FR_SBUFF_SET_RETURN(_dst, _src)
#define SBUFF_CHAR_UNPRINTABLES_EXTENDED
#define FR_SBUFF(_sbuff_or_marker)
#define fr_sbuff_advance(_sbuff_or_marker, _len)
#define SBUFF_CHAR_UNPRINTABLES_LOW
Set of terminal elements.
static char buff[sizeof("18446744073709551615")+3]
Definition size_tests.c:41
return count
Definition module.c:155
char * talloc_bstr_realloc(TALLOC_CTX *ctx, char *in, size_t inlen)
Trim a bstr (char) buffer.
Definition talloc.c:681
char * talloc_bstrndup(TALLOC_CTX *ctx, char const *in, size_t inlen)
Binary safe strndup function.
Definition talloc.c:617
#define talloc_zero_pooled_object(_ctx, _type, _num_subobjects, _total_subobjects_size)
Definition talloc.h:201
static int talloc_const_free(void const *ptr)
Free const'd memory.
Definition talloc.h:253
static TALLOC_CTX * talloc_init_const(char const *name)
Allocate a top level chunk with a constant name.
Definition talloc.h:120
enum fr_token fr_token_t
@ T_OP_REG_EQ
Definition token.h:100
@ T_OP_REG_NE
Definition token.h:101
int fr_regex_cmp_op(fr_token_t op, fr_value_box_t const *a, fr_value_box_t const *b)
Compare two boxes using an operator.
Definition regex.c:1024
#define fr_strerror_printf(_fmt,...)
Log to thread local error buffer.
Definition strerror.h:64
#define fr_strerror_const(_msg)
Definition strerror.h:223
static fr_slen_t fr_value_box_aprint(TALLOC_CTX *ctx, char **out, fr_value_box_t const *data, fr_sbuff_escape_rules_t const *e_rules) 1(fr_value_box_print
static size_t char ** out
Definition value.h:1030