All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
regex.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15  */
16 
17 /**
18  * @file lib/regex.c
19  * @brief regex abstraction functions
20  *
21  * @copyright 2014 The FreeRADIUS server project
22  * @copyright 2014 Arran Cudbard-Bell <a.cudbardb@freeradius.org>
23  */
24 
25 #ifdef HAVE_REGEX
26 #include <freeradius-devel/libradius.h>
27 #include <freeradius-devel/regex.h>
28 
29 /*
30  * Wrapper functions for libpcre. Much more powerful, and guaranteed
31  * to be binary safe but require libpcre.
32  */
33 # ifdef HAVE_PCRE
34 /** Free regex_t structure
35  *
36  * Calls libpcre specific free functions for the expression and study.
37  *
38  * @param preg to free.
39  */
40 static int _regex_free(regex_t *preg)
41 {
42  if (preg->compiled) pcre_free(preg->compiled);
43 #ifdef PCRE_CONFIG_JIT
44  if (preg->extra) pcre_free_study(preg->extra);
45 #else
46  if (preg->extra) pcre_free(preg->extra);
47 #endif
48 
49  return 0;
50 }
51 
52 /*
53  * Replace the libpcre malloc and free functions with
54  * talloc wrappers. This allows us to use the subcapture copy
55  * functions and just reparent the memory allocated.
56  */
57 static void *_pcre_malloc(size_t to_alloc) {
58  return talloc_array(NULL, uint8_t, to_alloc);
59 }
60 
61 static void _pcre_free(void *to_free) {
62  talloc_free(to_free);
63 }
64 
65 /** Wrapper around pcre_compile
66  *
67  * Allows the rest of the code to do compilations using one function signature.
68  *
69  * @note Compiled expression must be freed with talloc_free.
70  *
71  * @param out Where to write out a pointer to the structure containing the compiled expression.
72  * @param pattern to compile.
73  * @param len of pattern.
74  * @param ignore_case whether to do case insensitive matching.
75  * @param multiline If true $ matches newlines.
76  * @param subcaptures Whether to compile the regular expression to store subcapture
77  * data.
78  * @param runtime If false run the pattern through the PCRE JIT to convert it to machine code.
79  * This trades startup time (longer) for runtime performance (better).
80  * @return
81  * - >= 1 on success.
82  * - <= 0 on error. Negative value is offset of parse error.
83  */
84 ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
85  bool ignore_case, bool multiline, bool subcaptures, bool runtime)
86 {
87  char const *error;
88  int offset;
89  int cflags = 0;
90  regex_t *preg;
91 
92  static bool setup;
93 
94  /*
95  * Lets us use subcapture copy
96  */
97  if (!setup) {
98  pcre_malloc = _pcre_malloc;
99  pcre_free = _pcre_free;
100  }
101 
102  *out = NULL;
103 
104  if (len == 0) {
105  fr_strerror_printf("Empty expression");
106  return 0;
107  }
108 
109  if (ignore_case) cflags |= PCRE_CASELESS;
110  if (multiline) cflags |= PCRE_MULTILINE;
111  if (!subcaptures) cflags |= PCRE_NO_AUTO_CAPTURE;
112 
113  preg = talloc_zero(ctx, regex_t);
114  talloc_set_destructor(preg, _regex_free);
115 
116  preg->compiled = pcre_compile(pattern, cflags, &error, &offset, NULL);
117  if (!preg->compiled) {
118  talloc_free(preg);
119  fr_strerror_printf("Pattern compilation failed: %s", error);
120 
121  return -(ssize_t)offset;
122  }
123  if (!runtime) {
124  preg->precompiled = true;
125  preg->extra = pcre_study(preg->compiled, PCRE_STUDY_JIT_COMPILE, &error);
126  if (error) {
127  talloc_free(preg);
128  fr_strerror_printf("Pattern study failed: %s", error);
129 
130  return 0;
131  }
132  }
133 
134  *out = preg;
135 
136  return len;
137 }
138 
139 static const FR_NAME_NUMBER regex_pcre_error_str[] = {
140  { "PCRE_ERROR_NOMATCH", PCRE_ERROR_NOMATCH },
141  { "PCRE_ERROR_NULL", PCRE_ERROR_NULL },
142  { "PCRE_ERROR_BADOPTION", PCRE_ERROR_BADOPTION },
143  { "PCRE_ERROR_BADMAGIC", PCRE_ERROR_BADMAGIC },
144  { "PCRE_ERROR_UNKNOWN_OPCODE", PCRE_ERROR_UNKNOWN_OPCODE },
145  { "PCRE_ERROR_NOMEMORY", PCRE_ERROR_NOMEMORY },
146  { "PCRE_ERROR_NOSUBSTRING", PCRE_ERROR_NOSUBSTRING },
147  { "PCRE_ERROR_MATCHLIMIT", PCRE_ERROR_MATCHLIMIT },
148  { "PCRE_ERROR_CALLOUT", PCRE_ERROR_CALLOUT },
149  { "PCRE_ERROR_BADUTF8", PCRE_ERROR_BADUTF8 },
150  { "PCRE_ERROR_BADUTF8_OFFSET", PCRE_ERROR_BADUTF8_OFFSET },
151  { "PCRE_ERROR_PARTIAL", PCRE_ERROR_PARTIAL },
152  { "PCRE_ERROR_BADPARTIAL", PCRE_ERROR_BADPARTIAL },
153  { "PCRE_ERROR_INTERNAL", PCRE_ERROR_INTERNAL },
154  { "PCRE_ERROR_BADCOUNT", PCRE_ERROR_BADCOUNT },
155  { "PCRE_ERROR_DFA_UITEM", PCRE_ERROR_DFA_UITEM },
156  { "PCRE_ERROR_DFA_UCOND", PCRE_ERROR_DFA_UCOND },
157  { "PCRE_ERROR_DFA_UMLIMIT", PCRE_ERROR_DFA_UMLIMIT },
158  { "PCRE_ERROR_DFA_WSSIZE", PCRE_ERROR_DFA_WSSIZE },
159  { "PCRE_ERROR_DFA_RECURSE", PCRE_ERROR_DFA_RECURSE },
160  { "PCRE_ERROR_RECURSIONLIMIT", PCRE_ERROR_RECURSIONLIMIT },
161  { "PCRE_ERROR_NULLWSLIMIT", PCRE_ERROR_NULLWSLIMIT },
162  { "PCRE_ERROR_BADNEWLINE", PCRE_ERROR_BADNEWLINE },
163  { NULL, 0 }
164 };
165 
166 /** Wrapper around pcre_exec
167  *
168  * @param preg The compiled expression.
169  * @param subject to match.
170  * @param len Length of subject.
171  * @param pmatch Array of match pointers.
172  * @param nmatch How big the match array is. Updated to number of matches.
173  * @return
174  * - -1 on failure.
175  * - 0 on no match.
176  * - 1 on match.
177  */
178 int regex_exec(regex_t *preg, char const *subject, size_t len, regmatch_t pmatch[], size_t *nmatch)
179 {
180  int ret;
181  size_t matches;
182 
183  /*
184  * PCRE_NO_AUTO_CAPTURE is a compile time only flag,
185  * and can't be passed here.
186  * We rely on the fact that matches has been set to
187  * 0 as a hint that no subcapture data should be
188  * generated.
189  */
190  if (!pmatch || !nmatch) {
191  pmatch = NULL;
192  if (nmatch) *nmatch = 0;
193  matches = 0;
194  } else {
195  matches = *nmatch;
196  }
197 
198  ret = pcre_exec(preg->compiled, preg->extra, subject, len, 0, 0, (int *)pmatch, matches * 3);
199  if (ret < 0) {
200  if (ret == PCRE_ERROR_NOMATCH) return 0;
201 
202  fr_strerror_printf("regex evaluation failed with code (%i): %s", ret,
203  fr_int2str(regex_pcre_error_str, ret, "<INVALID>"));
204  return -1;
205  }
206 
207  /*
208  * 0 signifies more offsets than we provided space for,
209  * so don't touch nmatches.
210  */
211  if (nmatch && (ret > 0)) *nmatch = ret;
212 
213  return 1;
214 }
215 /*
216  * Wrapper functions for POSIX like, and extended regular
217  * expressions. These use the system regex library.
218  */
219 # else
220 /** Free heap allocated regex_t structure
221  *
222  * Heap allocation of regex_t is needed so regex_compile has the same signature with
223  * POSIX or libpcre.
224  *
225  * @param preg to free.
226  */
227 static int _regex_free(regex_t *preg)
228 {
229  regfree(preg);
230 
231  return 0;
232 }
233 
234 /** Binary safe wrapper around regcomp
235  *
236  * If we have the BSD extensions we don't need to do any special work
237  * if we don't have the BSD extensions we need to check to see if the
238  * regular expression contains any \0 bytes.
239  *
240  * If it does we fail and print the appropriate error message.
241  *
242  * @note Compiled expression must be freed with talloc_free.
243  *
244  * @param ctx To allocate memory in.
245  * @param out Where to write out a pointer to the structure containing the
246  * compiled expression.
247  * @param pattern to compile.
248  * @param len of pattern.
249  * @param ignore_case Whether the match should be case ignore_case.
250  * @param multiline If true $ matches newlines.
251  * @param subcaptures Whether to compile the regular expression to store subcapture
252  * data.
253  * @param runtime Whether the compilation is being done at runtime.
254  * @return
255  * - >= 1 on success.
256  * - <= 0 on error. Negative value is offset of parse error.
257  * With POSIX regex we only give the correct offset for embedded \0 errors.
258  */
259 ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
260  bool ignore_case, bool multiline, bool subcaptures, UNUSED bool runtime)
261 {
262  int ret;
263  int cflags = REG_EXTENDED;
264  regex_t *preg;
265 
266  if (len == 0) {
267  fr_strerror_printf("Empty expression");
268  return 0;
269  }
270 
271  if (ignore_case) cflags |= REG_ICASE;
272  if (multiline) cflags |= REG_NEWLINE;
273  if (!subcaptures) cflags |= REG_NOSUB;
274 
275 #ifndef HAVE_REGNCOMP
276  {
277  char const *p;
278 
279  p = pattern;
280  p += strlen(pattern);
281 
282  if ((size_t)(p - pattern) != len) {
283  fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation",
284  (p - pattern));
285  return -(p - pattern);
286  }
287 
288  preg = talloc_zero(ctx, regex_t);
289  if (!preg) return 0;
290 
291  ret = regcomp(preg, pattern, cflags);
292  }
293 #else
294  preg = talloc_zero(ctx, regex_t);
295  if (!preg) return 0;
296  ret = regncomp(preg, pattern, len, cflags);
297 #endif
298  if (ret != 0) {
299  char errbuf[128];
300 
301  regerror(ret, preg, errbuf, sizeof(errbuf));
302  fr_strerror_printf("Pattern compilation failed: %s", errbuf);
303 
304  talloc_free(preg);
305 
306  return 0; /* POSIX expressions don't give us the failure offset */
307  }
308 
309  talloc_set_destructor(preg, _regex_free);
310  *out = preg;
311 
312  return len;
313 }
314 
315 /** Binary safe wrapper around regexec
316  *
317  * If we have the BSD extensions we don't need to do any special work
318  * If we don't have the BSD extensions we need to check to see if the
319  * value to be compared contains any \0 bytes.
320  *
321  * If it does, we fail and print the appropriate error message.
322  *
323  * @param preg The compiled expression.
324  * @param subject to match.
325  * @param pmatch Array of match pointers.
326  * @param nmatch How big the match array is. Updated to number of matches.
327  * @return
328  * - -1 on failure.
329  * - 0 on no match.
330  * - 1 on match.
331  */
332 int regex_exec(regex_t *preg, char const *subject, size_t len, regmatch_t pmatch[], size_t *nmatch)
333 {
334  int ret;
335  size_t matches;
336 
337  /*
338  * Disable capturing
339  */
340  if (!pmatch || !nmatch) {
341  pmatch = NULL;
342  if (nmatch) *nmatch = 0;
343  matches = 0;
344  } else {
345  /* regexec does not seem to initialise unused elements */
346  matches = *nmatch;
347  memset(pmatch, 0, sizeof(pmatch[0]) * matches);
348  }
349 
350 #ifndef HAVE_REGNEXEC
351  {
352  char const *p;
353 
354  p = subject;
355  p += strlen(subject);
356 
357  if ((size_t)(p - subject) != len) {
358  fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation",
359  (p - subject));
360  return -1;
361  }
362  ret = regexec(preg, subject, matches, pmatch, 0);
363  }
364 #else
365  ret = regnexec(preg, subject, len, matches, pmatch, 0);
366 #endif
367  if (ret != 0) {
368  if (ret != REG_NOMATCH) {
369  char errbuf[128];
370 
371  regerror(ret, preg, errbuf, sizeof(errbuf));
372 
373  fr_strerror_printf("regex evaluation failed: %s", errbuf);
374  if (nmatch) *nmatch = 0;
375  return -1;
376  }
377  return 0;
378  }
379 
380  /*
381  * Update *nmatch to be the maximum number of
382  * groups that *could* have been populated,
383  * need to check them later.
384  */
385  if (nmatch && (*nmatch > preg->re_nsub)) *nmatch = preg->re_nsub + 1;
386 
387  return 1;
388 }
389 # endif
390 #endif
#define UNUSED
Definition: libradius.h:134
void fr_strerror_printf(char const *,...) CC_HINT(format(printf
char const * fr_int2str(FR_NAME_NUMBER const *table, int number, char const *def)
Definition: token.c:506