dte test coverage


Directory: ./
File: src/regexp.c
Date: 2025-05-08 15:05:54
Exec Total Coverage
Lines: 62 73 84.9%
Functions: 9 10 90.0%
Branches: 14 24 58.3%

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "util/ascii.h"
5 #include "util/debug.h"
6 #include "util/hashmap.h"
7 #include "util/intern.h"
8 #include "util/xmalloc.h"
9 #include "util/xsnprintf.h"
10 #include "util/xstring.h"
11
12 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
13 static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS};
14
15 bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err)
16 {
17 if (!ebuf) {
18 return false;
19 }
20 char msg[1024];
21 regerror(err, re, msg, sizeof(msg));
22 return error_msg(ebuf, "%s: %s", msg, pattern);
23 }
24
25 3 void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags)
26 {
27 // Note: DEFAULT_REGEX_FLAGS isn't used here because this function
28 // is only used for compiling built-in patterns, where we explicitly
29 // avoid using "enhanced" features
30 3 int err = regcomp(re, pattern, flags | REG_EXTENDED);
31
1/2
✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 3 times.
3 if (unlikely(err)) {
32 char msg[1024];
33 regerror(err, re, msg, sizeof(msg));
34 fatal_error(msg, EINVAL);
35 }
36 3 }
37
38 143 bool regexp_exec (
39 const regex_t *re,
40 const char *buf,
41 size_t size,
42 size_t nmatch,
43 regmatch_t *pmatch,
44 int flags
45 ) {
46 // ASan's __interceptor_regexec() doesn't support REG_STARTEND
47 #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0
48 // "If REG_STARTEND is specified, pmatch must point to at least
49 // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified),
50 // to hold the input offsets for REG_STARTEND."
51 // -- https://man.openbsd.org/regex.3
52 143 pmatch[0].rm_so = 0;
53 143 pmatch[0].rm_eo = size;
54 143 return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND);
55 #else
56 // Buffer must be null-terminated if REG_STARTEND isn't supported
57 char *tmp = xstrcut(buf, size);
58 int ret = !regexec(re, tmp, nmatch, pmatch, flags);
59 free(tmp);
60 return ret;
61 #endif
62 }
63
64 // Check which word boundary tokens are supported by regcomp(3)
65 // (if any) and initialize `rwbt` with them for later use
66 9 bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
67 {
68 9 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
69 9 const regoff_t match_start = 20, match_end = 23;
70 9 static const RegexpWordBoundaryTokens pairs[] = {
71 {"\\<", "\\>"},
72 {"[[:<:]]", "[[:>:]]"},
73 {"\\b", "\\b"},
74 };
75
76 9 BUG_ON(ARRAYLEN(text) <= match_end);
77 9 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
78
79
1/2
✓ Branch 0 (17→5) taken 9 times.
✗ Branch 1 (17→18) not taken.
9 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
80 9 const char *start = pairs[i].start;
81 9 const char *end = pairs[i].end;
82 9 char patt[32];
83 9 xsnprintf(patt, sizeof(patt), "%s(foo)%s", start, end);
84 9 regex_t re;
85
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→9) taken 9 times.
9 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
86 continue;
87 }
88 9 regmatch_t m[2];
89 9 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
90 9 regfree(&re);
91
3/6
✓ Branch 0 (11→12) taken 9 times.
✗ Branch 1 (11→15) not taken.
✓ Branch 2 (12→13) taken 9 times.
✗ Branch 3 (12→15) not taken.
✓ Branch 4 (13→14) taken 9 times.
✗ Branch 5 (13→15) not taken.
9 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
92 9 *rwbt = pairs[i];
93 9 return true;
94 }
95 }
96
97 return false;
98 }
99
100 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
101 {
102 1 BUG_ON(buflen < (2 * plen) + 1);
103 size_t o = 0;
104
2/2
✓ Branch 0 (7→4) taken 27 times.
✓ Branch 1 (7→8) taken 1 times.
28 for (size_t i = 0; i < plen; i++) {
105 27 char ch = pat[i];
106
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 12 times.
27 if (is_regex_special_char(ch)) {
107 15 buf[o++] = '\\';
108 }
109 27 buf[o++] = ch;
110 }
111 1 buf[o] = '\0';
112 1 return o;
113 }
114
115 1 char *regexp_escape(const char *pattern, size_t len)
116 {
117 1 size_t buflen = xmul(2, len) + 1;
118 1 char *buf = xmalloc(buflen);
119 1 regexp_escapeb(buf, buflen, pattern, len);
120 1 return buf;
121 }
122
123 27 const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern)
124 {
125
1/2
✓ Branch 0 (2→3) taken 27 times.
✗ Branch 1 (2→14) not taken.
27 if (pattern[0] == '\0') {
126 return NULL;
127 }
128
129 27 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
130
2/2
✓ Branch 0 (4→5) taken 26 times.
✓ Branch 1 (4→14) taken 1 times.
27 if (ir) {
131 return ir;
132 }
133
134 26 ir = xmalloc(sizeof(*ir));
135 26 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
136
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→10) taken 26 times.
26 if (unlikely(err)) {
137 regexp_error_msg(ebuf, &ir->re, pattern, err);
138 free(ir);
139 return NULL;
140 }
141
142 26 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
143 26 const char *str = str_intern(pattern);
144 26 ir->str = str;
145 26 return hashmap_insert(&interned_regexps, (char*)str, ir);
146 }
147
148 46 bool regexp_is_interned(const char *pattern)
149 {
150 46 return !!hashmap_find(&interned_regexps, pattern);
151 }
152
153 26 static void free_interned_regexp(InternedRegexp *ir)
154 {
155 26 regfree(&ir->re);
156 26 free(ir);
157 26 }
158
159 9 void free_interned_regexps(void)
160 {
161 9 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
162 9 hashmap_free(&interned_regexps, (FreeFunction)free_interned_regexp);
163 9 }
164