Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <errno.h> | ||
2 | #include <stdlib.h> | ||
3 | #include "regexp.h" | ||
4 | #include "error.h" | ||
5 | #include "util/ascii.h" | ||
6 | #include "util/debug.h" | ||
7 | #include "util/hashmap.h" | ||
8 | #include "util/xmalloc.h" | ||
9 | #include "util/xsnprintf.h" | ||
10 | #include "util/xstring.h" | ||
11 | |||
12 | // NOLINTNEXTLINE(*-avoid-non-const-global-variables) | ||
13 | static HashMap interned_regexps; | ||
14 | |||
15 | ✗ | bool regexp_error_msg(const regex_t *re, const char *pattern, int err) | |
16 | { | ||
17 | ✗ | char msg[1024]; | |
18 | ✗ | regerror(err, re, msg, sizeof(msg)); | |
19 | ✗ | return error_msg("%s: %s", msg, pattern); | |
20 | } | ||
21 | |||
22 | 213 | bool regexp_compile_internal(regex_t *re, const char *pattern, int flags) | |
23 | { | ||
24 | 213 | int err = regcomp(re, pattern, flags); | |
25 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 213 times.
|
213 | if (err) { |
26 | ✗ | return regexp_error_msg(re, pattern, err); | |
27 | } | ||
28 | return true; | ||
29 | } | ||
30 | |||
31 | 2 | void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags) | |
32 | { | ||
33 | // Note: DEFAULT_REGEX_FLAGS isn't used here because this function | ||
34 | // is only used for compiling built-in patterns, where we explicitly | ||
35 | // avoid using "enhanced" features | ||
36 | 2 | int err = regcomp(re, pattern, flags | REG_EXTENDED); | |
37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (unlikely(err)) { |
38 | ✗ | char msg[1024]; | |
39 | ✗ | regerror(err, re, msg, sizeof(msg)); | |
40 | − | fatal_error(msg, EINVAL); | |
41 | } | ||
42 | 2 | } | |
43 | |||
44 | 92 | bool regexp_exec ( | |
45 | const regex_t *re, | ||
46 | const char *buf, | ||
47 | size_t size, | ||
48 | size_t nmatch, | ||
49 | regmatch_t *pmatch, | ||
50 | int flags | ||
51 | ) { | ||
52 | // "If REG_STARTEND is specified, pmatch must point to at least one | ||
53 | // regmatch_t (even if nmatch is 0 or REG_NOSUB was specified), to | ||
54 | // hold the input offsets for REG_STARTEND." | ||
55 | // -- https://man.openbsd.org/regex.3 | ||
56 | 92 | BUG_ON(!pmatch); | |
57 | |||
58 | // ASan's __interceptor_regexec() doesn't support REG_STARTEND | ||
59 | #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0 | ||
60 | 92 | pmatch[0].rm_so = 0; | |
61 | 92 | pmatch[0].rm_eo = size; | |
62 | 92 | return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND); | |
63 | #else | ||
64 | // Buffer must be null-terminated if REG_STARTEND isn't supported | ||
65 | char *tmp = xstrcut(buf, size); | ||
66 | int ret = !regexec(re, tmp, nmatch, pmatch, flags); | ||
67 | free(tmp); | ||
68 | return ret; | ||
69 | #endif | ||
70 | } | ||
71 | |||
72 | // Check which word boundary tokens are supported by regcomp(3) | ||
73 | // (if any) and initialize `rwbt` with them for later use | ||
74 | 8 | bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt) | |
75 | { | ||
76 | 8 | static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE"; | |
77 | 8 | const regoff_t match_start = 20, match_end = 23; | |
78 | 8 | static const RegexpWordBoundaryTokens pairs[] = { | |
79 | {"\\<", "\\>"}, | ||
80 | {"[[:<:]]", "[[:>:]]"}, | ||
81 | {"\\b", "\\b"}, | ||
82 | }; | ||
83 | |||
84 | 8 | BUG_ON(ARRAYLEN(text) <= match_end); | |
85 | 8 | BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5)); | |
86 | |||
87 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | for (size_t i = 0; i < ARRAYLEN(pairs); i++) { |
88 | 8 | const char *start = pairs[i].start; | |
89 | 8 | const char *end = pairs[i].end; | |
90 | 8 | char patt[32]; | |
91 | 8 | xsnprintf(patt, sizeof(patt), "%s(foo)%s", start, end); | |
92 | 8 | regex_t re; | |
93 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) { |
94 | ✗ | continue; | |
95 | } | ||
96 | 8 | regmatch_t m[2]; | |
97 | 8 | bool match = !regexec(&re, text, ARRAYLEN(m), m, 0); | |
98 | 8 | regfree(&re); | |
99 |
3/6✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
|
8 | if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) { |
100 | 8 | *rwbt = pairs[i]; | |
101 | 8 | return true; | |
102 | } | ||
103 | } | ||
104 | |||
105 | return false; | ||
106 | } | ||
107 | |||
108 | 1 | size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen) | |
109 | { | ||
110 | 1 | BUG_ON(buflen < (2 * plen) + 1); | |
111 | size_t o = 0; | ||
112 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 1 times.
|
28 | for (size_t i = 0; i < plen; i++) { |
113 | 27 | char ch = pat[i]; | |
114 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 12 times.
|
27 | if (is_regex_special_char(ch)) { |
115 | 15 | buf[o++] = '\\'; | |
116 | } | ||
117 | 27 | buf[o++] = ch; | |
118 | } | ||
119 | 1 | buf[o] = '\0'; | |
120 | 1 | return o; | |
121 | } | ||
122 | |||
123 | 1 | char *regexp_escape(const char *pattern, size_t len) | |
124 | { | ||
125 | 1 | size_t buflen = xmul(2, len) + 1; | |
126 | 1 | char *buf = xmalloc(buflen); | |
127 | 1 | regexp_escapeb(buf, buflen, pattern, len); | |
128 | 1 | return buf; | |
129 | } | ||
130 | |||
131 | 23 | const InternedRegexp *regexp_intern(const char *pattern) | |
132 | { | ||
133 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | if (pattern[0] == '\0') { |
134 | return NULL; | ||
135 | } | ||
136 | |||
137 | 23 | InternedRegexp *ir = hashmap_get(&interned_regexps, pattern); | |
138 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | if (ir) { |
139 | return ir; | ||
140 | } | ||
141 | |||
142 | 23 | ir = xnew(InternedRegexp, 1); | |
143 | 23 | int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB); | |
144 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (unlikely(err)) { |
145 | ✗ | regexp_error_msg(&ir->re, pattern, err); | |
146 | ✗ | free(ir); | |
147 | ✗ | return NULL; | |
148 | } | ||
149 | |||
150 | 23 | char *str = xstrdup(pattern); | |
151 | 23 | ir->str = str; | |
152 | 23 | return hashmap_insert(&interned_regexps, str, ir); | |
153 | } | ||
154 | |||
155 | 38 | bool regexp_is_interned(const char *pattern) | |
156 | { | ||
157 | 38 | return !!hashmap_find(&interned_regexps, pattern); | |
158 | } | ||
159 | |||
160 | // Note: this does NOT free InternedRegexp::str, because it points at the | ||
161 | // same string as HashMapEntry::key and is already freed by hashmap_free() | ||
162 | 23 | static void free_interned_regexp(InternedRegexp *ir) | |
163 | { | ||
164 | 23 | regfree(&ir->re); | |
165 | 23 | free(ir); | |
166 | 23 | } | |
167 | |||
168 | 8 | void free_interned_regexps(void) | |
169 | { | ||
170 | 8 | hashmap_free(&interned_regexps, (FreeFunction)free_interned_regexp); | |
171 | 8 | } | |
172 |