dte test coverage


Directory: ./
File: src/regexp.c
Date: 2024-12-21 16:03:22
Exec Total Coverage
Lines: 64 75 85.3%
Functions: 10 11 90.9%
Branches: 14 24 58.3%

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "error.h"
5 #include "util/ascii.h"
6 #include "util/debug.h"
7 #include "util/hashmap.h"
8 #include "util/xmalloc.h"
9 #include "util/xsnprintf.h"
10 #include "util/xstring.h"
11
12 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
13 static HashMap interned_regexps;
14
15 bool regexp_error_msg(const regex_t *re, const char *pattern, int err)
16 {
17 char msg[1024];
18 regerror(err, re, msg, sizeof(msg));
19 return error_msg("%s: %s", msg, pattern);
20 }
21
22 213 bool regexp_compile_internal(regex_t *re, const char *pattern, int flags)
23 {
24 213 int err = regcomp(re, pattern, flags);
25
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 213 times.
213 if (err) {
26 return regexp_error_msg(re, pattern, err);
27 }
28 return true;
29 }
30
31 2 void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags)
32 {
33 // Note: DEFAULT_REGEX_FLAGS isn't used here because this function
34 // is only used for compiling built-in patterns, where we explicitly
35 // avoid using "enhanced" features
36 2 int err = regcomp(re, pattern, flags | REG_EXTENDED);
37
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (unlikely(err)) {
38 char msg[1024];
39 regerror(err, re, msg, sizeof(msg));
40 fatal_error(msg, EINVAL);
41 }
42 2 }
43
44 92 bool regexp_exec (
45 const regex_t *re,
46 const char *buf,
47 size_t size,
48 size_t nmatch,
49 regmatch_t *pmatch,
50 int flags
51 ) {
52 // "If REG_STARTEND is specified, pmatch must point to at least one
53 // regmatch_t (even if nmatch is 0 or REG_NOSUB was specified), to
54 // hold the input offsets for REG_STARTEND."
55 // -- https://man.openbsd.org/regex.3
56 92 BUG_ON(!pmatch);
57
58 // ASan's __interceptor_regexec() doesn't support REG_STARTEND
59 #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0
60 92 pmatch[0].rm_so = 0;
61 92 pmatch[0].rm_eo = size;
62 92 return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND);
63 #else
64 // Buffer must be null-terminated if REG_STARTEND isn't supported
65 char *tmp = xstrcut(buf, size);
66 int ret = !regexec(re, tmp, nmatch, pmatch, flags);
67 free(tmp);
68 return ret;
69 #endif
70 }
71
72 // Check which word boundary tokens are supported by regcomp(3)
73 // (if any) and initialize `rwbt` with them for later use
74 8 bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
75 {
76 8 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
77 8 const regoff_t match_start = 20, match_end = 23;
78 8 static const RegexpWordBoundaryTokens pairs[] = {
79 {"\\<", "\\>"},
80 {"[[:<:]]", "[[:>:]]"},
81 {"\\b", "\\b"},
82 };
83
84 8 BUG_ON(ARRAYLEN(text) <= match_end);
85 8 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
86
87
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
88 8 const char *start = pairs[i].start;
89 8 const char *end = pairs[i].end;
90 8 char patt[32];
91 8 xsnprintf(patt, sizeof(patt), "%s(foo)%s", start, end);
92 8 regex_t re;
93
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
94 continue;
95 }
96 8 regmatch_t m[2];
97 8 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
98 8 regfree(&re);
99
3/6
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
8 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
100 8 *rwbt = pairs[i];
101 8 return true;
102 }
103 }
104
105 return false;
106 }
107
108 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
109 {
110 1 BUG_ON(buflen < (2 * plen) + 1);
111 size_t o = 0;
112
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 1 times.
28 for (size_t i = 0; i < plen; i++) {
113 27 char ch = pat[i];
114
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 12 times.
27 if (is_regex_special_char(ch)) {
115 15 buf[o++] = '\\';
116 }
117 27 buf[o++] = ch;
118 }
119 1 buf[o] = '\0';
120 1 return o;
121 }
122
123 1 char *regexp_escape(const char *pattern, size_t len)
124 {
125 1 size_t buflen = xmul(2, len) + 1;
126 1 char *buf = xmalloc(buflen);
127 1 regexp_escapeb(buf, buflen, pattern, len);
128 1 return buf;
129 }
130
131 23 const InternedRegexp *regexp_intern(const char *pattern)
132 {
133
1/2
✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
23 if (pattern[0] == '\0') {
134 return NULL;
135 }
136
137 23 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
138
1/2
✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
23 if (ir) {
139 return ir;
140 }
141
142 23 ir = xnew(InternedRegexp, 1);
143 23 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
144
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
23 if (unlikely(err)) {
145 regexp_error_msg(&ir->re, pattern, err);
146 free(ir);
147 return NULL;
148 }
149
150 23 char *str = xstrdup(pattern);
151 23 ir->str = str;
152 23 return hashmap_insert(&interned_regexps, str, ir);
153 }
154
155 38 bool regexp_is_interned(const char *pattern)
156 {
157 38 return !!hashmap_find(&interned_regexps, pattern);
158 }
159
160 // Note: this does NOT free InternedRegexp::str, because it points at the
161 // same string as HashMapEntry::key and is already freed by hashmap_free()
162 23 static void free_interned_regexp(InternedRegexp *ir)
163 {
164 23 regfree(&ir->re);
165 23 free(ir);
166 23 }
167
168 8 void free_interned_regexps(void)
169 {
170 8 hashmap_free(&interned_regexps, (FreeFunction)free_interned_regexp);
171 8 }
172