The FreeRADIUS server  $Id: 15bac2a4c627c01d1aa2047687b3418955ac7f00 $
token.c
Go to the documentation of this file.
1 /*
2  * This library is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU Lesser General Public
4  * License as published by the Free Software Foundation; either
5  * version 2.1 of the License, or (at your option) any later version.
6  *
7  * This library is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10  * Lesser General Public License for more details.
11  *
12  * You should have received a copy of the GNU Lesser General Public
13  * License along with this library; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15  */
16 
17 /** Tokenisation code and constants
18  *
19  * This is mostly for the attribute filter and user files.
20  *
21  * @file src/lib/util/token.c
22  *
23  * @copyright 2001,2006 The FreeRADIUS server project
24  */
25 RCSID("$Id: 8acd36299d8ad726735ceffea5a6ed233cebe9fc $")
26 
27 #include <freeradius-devel/util/misc.h>
28 #include <freeradius-devel/util/strerror.h>
29 #include <freeradius-devel/util/token.h>
30 
31 #include <ctype.h>
32 
34  { L("=~"), T_OP_REG_EQ }, /* order is important! */
35  { L("!~"), T_OP_REG_NE },
36  { L("{"), T_LCBRACE },
37  { L("}"), T_RCBRACE },
38  { L("("), T_LBRACE },
39  { L(")"), T_RBRACE },
40  { L(","), T_COMMA },
41  { L("++"), T_OP_INCRM },
42  { L("+="), T_OP_ADD_EQ },
43  { L("-="), T_OP_SUB_EQ },
44  { L(":="), T_OP_SET },
45  { L("=*"), T_OP_CMP_TRUE },
46  { L("!*"), T_OP_CMP_FALSE },
47  { L("=="), T_OP_CMP_EQ },
48  { L("==="), T_OP_CMP_EQ_TYPE },
49  { L("^="), T_OP_PREPEND },
50  { L("|="), T_OP_OR_EQ },
51  { L("&="), T_OP_AND_EQ },
52  { L("="), T_OP_EQ },
53  { L("!="), T_OP_NE },
54  { L("!=="), T_OP_CMP_NE_TYPE },
55  { L(">>="), T_OP_RSHIFT_EQ },
56  { L(">="), T_OP_GE },
57  { L(">"), T_OP_GT },
58  { L("<<="), T_OP_LSHIFT_EQ },
59  { L("<="), T_OP_LE },
60  { L("<"), T_OP_LT },
61  { L("#"), T_HASH },
62  { L(";"), T_SEMICOLON }
63 };
65 
67  { L(""), T_BARE_WORD },
68  { L("'"), T_SINGLE_QUOTED_STRING },
69  { L("/"), T_SOLIDUS_QUOTED_STRING },
70  { L("\""), T_DOUBLE_QUOTED_STRING },
71  { L("`"), T_BACK_QUOTED_STRING }
72 };
74 
75 /*
76  * String versions for all of the tokens.
77  */
78 char const *fr_tokens[T_TOKEN_LAST] = {
79  [T_INVALID] = "?",
80  [T_EOL] = "EOL",
81 
82  [T_LCBRACE] = "{",
83  [T_RCBRACE] = "}",
84  [T_LBRACE] = "(",
85  [T_RBRACE] = ")",
86  [T_COMMA] = ",",
87  [T_SEMICOLON] = ";",
88 
89  [T_ADD] = "+",
90  [T_SUB] = "-",
91  [T_MUL] = "*",
92  [T_DIV] = "/",
93  [T_AND] = "&",
94  [T_OR] = "|",
95  [T_NOT] = "!",
96  [T_XOR] = "^",
97  [T_COMPLEMENT] = "~",
98  [T_MOD] = "%",
99 
100  [T_RSHIFT] = ">>",
101  [T_LSHIFT] = "<<",
102 
103  [T_LAND] = "&&",
104  [T_LOR] = "||",
105 
106  [T_OP_INCRM] = "++",
107 
108  [T_OP_ADD_EQ] = "+=",
109  [T_OP_SUB_EQ] = "-=",
110  [T_OP_SET] = ":=",
111  [T_OP_EQ] = "=",
112  [T_OP_OR_EQ] = "|=",
113  [T_OP_AND_EQ] = "&=",
114 
115  [T_OP_RSHIFT_EQ] = ">>=",
116  [T_OP_LSHIFT_EQ] = "<<=",
117 
118  [T_OP_NE] = "!=",
119  [T_OP_GE] = ">=",
120  [T_OP_GT] = ">",
121  [T_OP_LE] = "<=",
122  [T_OP_LT] = "<",
123  [T_OP_REG_EQ] = "=~",
124  [T_OP_REG_NE] = "!~",
125 
126  [T_OP_CMP_TRUE] = "=*",
127  [T_OP_CMP_FALSE] = "!*",
128 
129  [T_OP_CMP_EQ] = "==",
130 
131  [T_OP_CMP_EQ_TYPE] = "===",
132  [T_OP_CMP_NE_TYPE] = "!==",
133 
134  [T_OP_PREPEND] = "^=",
135 
136  [T_HASH] = "#",
137  [T_BARE_WORD] = "<BARE-WORD>",
138  [T_DOUBLE_QUOTED_STRING] = "<\"STRING\">",
139  [T_SINGLE_QUOTED_STRING] = "<'STRING'>",
140  [T_BACK_QUOTED_STRING] = "<`STRING`>",
141  [T_SOLIDUS_QUOTED_STRING] = "</STRING/>",
142 };
143 
144 
145 /*
146  * This is fine. Don't complain.
147  */
148 #ifdef __clang__
149 #pragma clang diagnostic ignored "-Wgnu-designator"
150 #endif
151 
152 /** Convert tokens back to a quoting character
153  *
154  * Non-string types convert to '?' to screw ups can be identified easily
155  */
157  [ 0 ... T_HASH ] = '?', /* GCC extension for range initialization, also allowed by clang */
158 
159  [T_BARE_WORD] = '\0',
160  [T_DOUBLE_QUOTED_STRING] = '"',
161  [T_SINGLE_QUOTED_STRING] = '\'',
162  [T_BACK_QUOTED_STRING] = '`',
163  [T_SOLIDUS_QUOTED_STRING] = '/',
164 };
165 
166 #define T(_x) [T_OP_ ## _x] = true
167 
169  T(INCRM), /* only used by LDAP :( */
170 
171  T(ADD_EQ),
172  T(SUB_EQ),
173  T(MUL_EQ),
174  T(DIV_EQ),
175  T(AND_EQ),
176  T(OR_EQ),
177  T(RSHIFT_EQ),
178  T(LSHIFT_EQ),
179 
180  T(SET),
181  T(EQ),
182  T(PREPEND),
183 };
184 
186  T(ADD_EQ), /* append */
187  T(SUB_EQ), /* remove */
188  T(AND_EQ), /* intersection */
189  T(OR_EQ), /* union */
190  T(LE), /* merge RHS */
191  T(GE), /* merge LHS */
192 
193  T(SET),
194  T(EQ),
195  T(PREPEND), /* prepend */
196 };
197 
199  T(NE),
200  T(GE),
201  T(GT),
202  T(LE),
203  T(LT),
204  T(REG_EQ),
205  T(REG_NE),
206  T(CMP_TRUE),
207  T(CMP_FALSE),
208  T(CMP_EQ),
209  T(CMP_EQ_TYPE),
210  T(CMP_NE_TYPE),
211 };
212 
213 #undef T
214 #define T(_x) [T_ ## _x] = true
215 
216 const bool fr_binary_op[T_TOKEN_LAST] = {
217  T(ADD),
218  T(SUB),
219  T(MUL),
220  T(DIV),
221  T(AND),
222  T(OR),
223  T(MOD),
224  T(RSHIFT),
225  T(LSHIFT),
226 };
227 
228 
229 #undef T
230 #define T(_x) [T_## _x] = true
231 const bool fr_str_tok[T_TOKEN_LAST] = {
232  T(BARE_WORD),
233  T(DOUBLE_QUOTED_STRING),
234  T(SINGLE_QUOTED_STRING),
235  T(BACK_QUOTED_STRING),
236 };
237 
238 /*
239  * This works only as long as special tokens
240  * are max. 2 characters, but it's fast.
241  */
242 #define TOKEN_MATCH(bptr, tptr) \
243  ( (tptr)[0] == (bptr)[0] && \
244  ((tptr)[1] == (bptr)[1] || (tptr)[1] == 0))
245 
246 /*
247  * Read a word from a buffer and advance pointer.
248  * This function knows about escapes and quotes.
249  *
250  * At end-of-line, buf[0] is set to '\0'.
251  * Returns 0 or special token value.
252  */
253 static fr_token_t getthing(char const **ptr, char *buf, int buflen, bool tok,
254  fr_table_num_ordered_t const *tokenlist, size_t tokenlist_len, bool unescape)
255 {
256  char *s;
257  char const *p;
258  char quote;
259  bool triple = false;
260  unsigned int x;
261  size_t i;
262  fr_token_t token;
263 
264  buf[0] = '\0';
265 
266  /* Skip whitespace */
267  p = *ptr;
268 
270 
271  if (!*p) {
272  *ptr = p;
273  return T_EOL;
274  }
275 
276  /*
277  * Might be a 1 or 2 character token.
278  */
279  if (tok) {
280  for (i = 0; i < tokenlist_len; i++) {
281  if (TOKEN_MATCH(p, tokenlist[i].name.str)) {
282  strcpy(buf, tokenlist[i].name.str);
283  p += tokenlist[i].name.len;
284 
285  /*
286  * Try to shut up Coverity, which claims fr_token_t can be between 0..63, not
287  * 0..48???
288  */
289  if ((tokenlist[i].value < 0) || (tokenlist[i].value >= T_TOKEN_LAST)) return T_INVALID;
290 
291  token = tokenlist[i].value;
292  goto done;
293  }
294  }
295  }
296 
297  /* Read word. */
298  quote = '\0';
299  switch (*p) {
300  default:
301  token = T_BARE_WORD;
302  break;
303 
304  case '\'':
305  token = T_SINGLE_QUOTED_STRING;
306  break;
307 
308  case '"':
309  token = T_DOUBLE_QUOTED_STRING;
310  break;
311 
312  case '`':
313  token = T_BACK_QUOTED_STRING;
314  break;
315  }
316 
317  if (token != T_BARE_WORD) {
318  quote = *p;
319 
320  /*
321  * Triple-quoted strings are copied over verbatim, without escapes.
322  */
323  if ((buflen >= 3) && (p[1] == quote) && (p[2] == quote)) {
324  p += 3;
325  triple = true;
326  }
327 
328  p++;
329  }
330  s = buf;
331 
332  while (*p && buflen-- > 1) {
333  /*
334  * We're looking for strings. Stop on spaces, or
335  * (if given a token list), on a token, or on a
336  * comma.
337  */
338  if (!quote) {
339  if (isspace((uint8_t) *p)) break;
340 
341 
342  if (tok) {
343  for (i = 0; i < tokenlist_len; i++) {
344  if (TOKEN_MATCH(p, tokenlist[i].name.str)) {
345  *s++ = 0;
346  goto done;
347  }
348  }
349  }
350  if (*p == ',') break;
351 
352  /*
353  * Copy the character over.
354  */
355  *s++ = *p++;
356  continue;
357  } /* else there was a quotation character */
358 
359  /*
360  * Un-escaped quote character. We're done.
361  */
362  if (*p == quote) {
363  if (!triple) {
364  p++;
365  *s++ = 0;
366  goto done;
367  }
368 
369  if ((buflen >= 3) && (p[1] == quote) && (p[2] == quote)) {
370  p += 3;
371  *s++ = 0;
372  goto done;
373  }
374 
375  *s++ = *p++;
376  continue;
377  }
378 
379  /*
380  * Everything but backslash gets copied over.
381  */
382  if (*p != '\\') {
383  *s++ = *p++;
384  continue;
385  }
386 
387  /*
388  * There's nothing after the backslash, it's an error.
389  */
390  if (!p[1]) {
391  fr_strerror_const("Unterminated string");
392  return T_INVALID;
393  }
394 
395  if (unescape) {
396  p++;
397 
398  switch (*p) {
399  case 'r':
400  *s++ = '\r';
401  break;
402  case 'n':
403  *s++ = '\n';
404  break;
405  case 't':
406  *s++ = '\t';
407  break;
408 
409  default:
410  if (*p >= '0' && *p <= '9' &&
411  sscanf(p, "%3o", &x) == 1) {
412  *s++ = x;
413  p += 2;
414  } else
415  *s++ = *p;
416  break;
417  }
418  p++;
419 
420  } else {
421  /*
422  * Convert backslash-quote to quote, but
423  * leave everything else alone.
424  */
425  if (p[1] == quote) { /* convert '\'' --> ' */
426  p++;
427  } else {
428  if (buflen < 2) {
429  fr_strerror_const("Truncated input");
430  return T_INVALID;
431  }
432 
433  *(s++) = *(p++);
434  }
435  *(s++) = *(p++);
436  }
437  }
438 
439  *s++ = 0;
440 
441  if (quote) {
442  fr_strerror_const("Unterminated string");
443  return T_INVALID;
444  }
445 
446 done:
447  /* Skip whitespace again. */
449 
450  *ptr = p;
451 
452  return token;
453 }
454 
455 /*
456  * Read a "word" - this means we don't honor
457  * tokens as delimiters.
458  */
459 int getword(char const **ptr, char *buf, int buflen, bool unescape)
460 {
461  return getthing(ptr, buf, buflen, false, fr_tokens_table, fr_tokens_table_len, unescape) == T_EOL ? 0 : 1;
462 }
463 
464 
465 /*
466  * Read the next word, use tokens as delimiters.
467  */
468 fr_token_t gettoken(char const **ptr, char *buf, int buflen, bool unescape)
469 {
470  return getthing(ptr, buf, buflen, true, fr_tokens_table, fr_tokens_table_len, unescape);
471 }
472 
473 /*
474  * Expect an operator.
475  */
476 fr_token_t getop(char const **ptr)
477 {
478  char op[3];
479  fr_token_t token;
480 
481  token = getthing(ptr, op, sizeof(op), true, fr_tokens_table, fr_tokens_table_len, false);
482  if (!fr_assignment_op[token] && !fr_comparison_op[token]) {
483  fr_strerror_const("Expected operator");
484  return T_INVALID;
485  }
486  return token;
487 }
488 
489 /*
490  * Expect a string.
491  */
492 fr_token_t getstring(char const **ptr, char *buf, int buflen, bool unescape)
493 {
494  char const *p;
495 
496  if (!ptr || !*ptr || !buf) return T_INVALID;
497 
498  p = *ptr;
499 
501 
502  *ptr = p;
503 
504  if ((*p == '"') || (*p == '\'') || (*p == '`')) {
505  return gettoken(ptr, buf, buflen, unescape);
506  }
507 
508  return getthing(ptr, buf, buflen, false, fr_tokens_table, fr_tokens_table_len, unescape);
509 }
510 
511 char const *fr_token_name(int token)
512 {
513  return fr_table_str_by_value(fr_tokens_table, token, "<INVALID>");
514 }
515 
516 
517 /** Skip a quoted string.
518  *
519  * @param[in] start start of the string, pointing to the quotation character
520  * @param[in] end end of the string (or NULL for zero-terminated strings)
521  * @return
522  * >0 length of the string which was parsed
523  * <=0 on error
524  */
525 ssize_t fr_skip_string(char const *start, char const *end)
526 {
527  char const *p = start;
528  char quote;
529 
530  quote = *(p++);
531 
532  while ((end && (p < end)) || *p) {
533  /*
534  * Stop at the quotation character
535  */
536  if (*p == quote) {
537  p++;
538  return p - start;
539  }
540 
541  /*
542  * Not an escape character: it's OK.
543  */
544  if (*p != '\\') {
545  p++;
546  continue;
547  }
548 
549  if (end && ((p + 2) >= end)) {
550  fail:
551  fr_strerror_const("Unexpected escape at end of string");
552  return -(p - start);
553  }
554 
555  /*
556  * Escape at EOL is not allowed.
557  */
558  if (p[1] < ' ') goto fail;
559 
560  /*
561  * \r or \n, etc.
562  */
563  if (!isdigit((uint8_t) p[1])) {
564  p += 2;
565  continue;
566  }
567 
568  /*
569  * Double-quoted strings use \000
570  * Regexes use \0
571  */
572  if (quote == '/') {
573  p++;
574  continue;
575  }
576 
577  if (end && ((p + 4) >= end)) goto fail;
578 
579  /*
580  * Allow for \1f in single quoted strings
581  */
582  if ((quote == '\'') && isxdigit((uint8_t) p[1]) && isxdigit((uint8_t) p[2])) {
583  p += 3;
584  continue;
585  }
586 
587  if (!isdigit((uint8_t) p[2]) || !isdigit((uint8_t) p[3])) {
588  fr_strerror_const("Invalid octal escape");
589  return -(p - start);
590  }
591 
592  p += 4;
593  }
594 
595  /*
596  * Unexpected end of string.
597  */
598  fr_strerror_const("Unexpected end of string");
599  return -(p - start);
600 }
601 
strcpy(log_entry->msg, buffer)
#define RCSID(id)
Definition: build.h:481
#define L(_str)
Helper for initialising arrays of string literals.
Definition: build.h:207
#define NUM_ELEMENTS(_t)
Definition: build.h:335
Test enumeration values.
Definition: dict_test.h:92
long int ssize_t
Definition: merged_model.c:24
unsigned char uint8_t
Definition: merged_model.c:30
#define fr_skip_whitespace(_p)
Skip whitespace ('\t', '\n', '\v', '\f', '\r', ' ')
Definition: misc.h:59
static bool done
Definition: radclient.c:80
static char const * name
#define fr_table_str_by_value(_table, _number, _def)
Convert an integer to a string.
Definition: table.h:772
fr_table_elem_name_t name
Definition: table.h:58
size_t len
Literal string length.
Definition: table.h:43
An element in an arbitrarily ordered array of name to num mappings.
Definition: table.h:57
An element in a lexicographically sorted array of name to num mappings.
Definition: table.h:49
#define MOD(a, b)
const bool fr_assignment_op[T_TOKEN_LAST]
Definition: token.c:168
const bool fr_list_assignment_op[T_TOKEN_LAST]
Definition: token.c:185
char const * fr_token_name(int token)
Definition: token.c:511
#define TOKEN_MATCH(bptr, tptr)
Definition: token.c:242
ssize_t fr_skip_string(char const *start, char const *end)
Skip a quoted string.
Definition: token.c:525
fr_token_t gettoken(char const **ptr, char *buf, int buflen, bool unescape)
Definition: token.c:468
fr_table_num_ordered_t const fr_tokens_table[]
Definition: token.c:33
static fr_token_t getthing(char const **ptr, char *buf, int buflen, bool tok, fr_table_num_ordered_t const *tokenlist, size_t tokenlist_len, bool unescape)
Definition: token.c:253
const bool fr_str_tok[T_TOKEN_LAST]
Definition: token.c:231
fr_table_num_sorted_t const fr_token_quotes_table[]
Definition: token.c:66
size_t fr_token_quotes_table_len
Definition: token.c:73
size_t fr_tokens_table_len
Definition: token.c:64
fr_token_t getop(char const **ptr)
Definition: token.c:476
const char fr_token_quote[T_TOKEN_LAST]
Convert tokens back to a quoting character.
Definition: token.c:156
char const * fr_tokens[T_TOKEN_LAST]
Definition: token.c:78
fr_token_t getstring(char const **ptr, char *buf, int buflen, bool unescape)
Definition: token.c:492
int getword(char const **ptr, char *buf, int buflen, bool unescape)
Definition: token.c:459
const bool fr_comparison_op[T_TOKEN_LAST]
Definition: token.c:198
const bool fr_binary_op[T_TOKEN_LAST]
Definition: token.c:216
#define T(_x)
Definition: token.c:230
enum fr_token fr_token_t
@ T_AND
Definition: token.h:55
@ T_OP_SUB_EQ
Definition: token.h:70
@ T_INVALID
Definition: token.h:39
@ T_SUB
Definition: token.h:52
@ T_RSHIFT
Definition: token.h:62
@ T_RCBRACE
Definition: token.h:42
@ T_NOT
Definition: token.h:57
@ T_XOR
Definition: token.h:58
@ T_RBRACE
Definition: token.h:44
@ T_EOL
Definition: token.h:40
@ T_SEMICOLON
Definition: token.h:46
@ T_DIV
Definition: token.h:54
@ T_SINGLE_QUOTED_STRING
Definition: token.h:122
@ T_MOD
Definition: token.h:60
@ T_OP_AND_EQ
Definition: token.h:74
@ T_OP_CMP_TRUE
Definition: token.h:104
@ T_BARE_WORD
Definition: token.h:120
@ T_OP_EQ
Definition: token.h:83
@ T_LAND
Definition: token.h:91
@ T_COMPLEMENT
Definition: token.h:59
@ T_ADD
Definition: token.h:51
@ T_BACK_QUOTED_STRING
Definition: token.h:123
@ T_HASH
Definition: token.h:119
@ T_OP_SET
Definition: token.h:84
@ T_OP_NE
Definition: token.h:97
@ T_OP_ADD_EQ
Definition: token.h:69
@ T_OP_CMP_FALSE
Definition: token.h:105
@ T_OP_LSHIFT_EQ
Definition: token.h:77
@ T_LOR
Definition: token.h:92
@ T_LCBRACE
Definition: token.h:41
@ T_LSHIFT
Definition: token.h:63
@ T_OP_RSHIFT_EQ
Definition: token.h:76
@ T_OP_REG_EQ
Definition: token.h:102
@ T_OP_CMP_EQ_TYPE
Definition: token.h:107
@ T_DOUBLE_QUOTED_STRING
Definition: token.h:121
@ T_OP_CMP_EQ
Definition: token.h:106
@ T_OP_INCRM
Definition: token.h:113
@ T_LBRACE
Definition: token.h:43
@ T_MUL
Definition: token.h:53
@ T_OP_LE
Definition: token.h:100
@ T_OP_CMP_NE_TYPE
Definition: token.h:108
@ T_OP_GE
Definition: token.h:98
@ T_OP_GT
Definition: token.h:99
@ T_OP_OR_EQ
Definition: token.h:73
@ T_SOLIDUS_QUOTED_STRING
Definition: token.h:124
@ T_OP_LT
Definition: token.h:101
@ T_OP_REG_NE
Definition: token.h:103
@ T_COMMA
Definition: token.h:45
@ T_OR
Definition: token.h:56
@ T_OP_PREPEND
Definition: token.h:85
#define T_TOKEN_LAST
Definition: token.h:129
#define fr_strerror_const(_msg)
Definition: strerror.h:223