dte test coverage


Directory: ./
File: src/regexp.c
Date: 2025-07-03 15:44:24
Exec Total Coverage
Lines: 61 72 84.7%
Functions: 9 10 90.0%
Branches: 14 24 58.3%

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "util/arith.h"
5 #include "util/ascii.h"
6 #include "util/debug.h"
7 #include "util/hashmap.h"
8 #include "util/intern.h"
9 #include "util/xmalloc.h"
10 #include "util/xsnprintf.h"
11 #include "util/xstring.h"
12
13 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
14 static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS};
15
16 bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err)
17 {
18 if (!ebuf) {
19 return false;
20 }
21 char msg[1024];
22 regerror(err, re, msg, sizeof(msg));
23 return error_msg(ebuf, "%s: %s", msg, pattern);
24 }
25
26 3 void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags)
27 {
28 // Note: DEFAULT_REGEX_FLAGS isn't used here because this function
29 // is only used for compiling built-in patterns, where we explicitly
30 // avoid using "enhanced" features
31 3 int err = regcomp(re, pattern, flags | REG_EXTENDED);
32
1/2
✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 3 times.
3 if (unlikely(err)) {
33 char msg[1024];
34 regerror(err, re, msg, sizeof(msg));
35 fatal_error(msg, EINVAL);
36 }
37 3 }
38
39 143 bool regexp_exec (
40 const regex_t *re,
41 const char *buf,
42 size_t size,
43 size_t nmatch,
44 regmatch_t *pmatch,
45 int flags
46 ) {
47 // ASan's __interceptor_regexec() doesn't support REG_STARTEND
48 #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0
49 // "If REG_STARTEND is specified, pmatch must point to at least
50 // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified),
51 // to hold the input offsets for REG_STARTEND."
52 // -- https://man.openbsd.org/regex.3
53 143 pmatch[0].rm_so = 0;
54 143 pmatch[0].rm_eo = size;
55 143 return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND);
56 #else
57 // Buffer must be null-terminated if REG_STARTEND isn't supported
58 char *tmp = xstrcut(buf, size);
59 int ret = !regexec(re, tmp, nmatch, pmatch, flags);
60 free(tmp);
61 return ret;
62 #endif
63 }
64
65 // Check which word boundary tokens are supported by regcomp(3)
66 // (if any) and initialize `rwbt` with them for later use
67 11 bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
68 {
69 11 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
70 11 const regoff_t match_start = 20, match_end = 23;
71 11 static const RegexpWordBoundaryTokens pairs[] = {
72 {"\\<", "\\>", 2},
73 {"[[:<:]]", "[[:>:]]", 7},
74 {"\\b", "\\b", 2},
75 };
76
77 11 BUG_ON(ARRAYLEN(text) <= match_end);
78 11 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
79
80
1/2
✓ Branch 0 (17→5) taken 11 times.
✗ Branch 1 (17→18) not taken.
11 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
81 11 const RegexpWordBoundaryTokens *p = &pairs[i];
82 11 char patt[32];
83 11 xmempcpy3(patt, p->start, p->len, STRN("(foo)"), p->end, p->len + 1);
84 11 regex_t re;
85
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→9) taken 11 times.
11 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
86 continue;
87 }
88 11 regmatch_t m[2];
89 11 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
90 11 regfree(&re);
91
3/6
✓ Branch 0 (11→12) taken 11 times.
✗ Branch 1 (11→15) not taken.
✓ Branch 2 (12→13) taken 11 times.
✗ Branch 3 (12→15) not taken.
✓ Branch 4 (13→14) taken 11 times.
✗ Branch 5 (13→15) not taken.
11 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
92 11 *rwbt = pairs[i];
93 11 return true;
94 }
95 }
96
97 return false;
98 }
99
100 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
101 {
102 1 BUG_ON(buflen < (2 * plen) + 1);
103 size_t o = 0;
104
2/2
✓ Branch 0 (7→4) taken 27 times.
✓ Branch 1 (7→8) taken 1 times.
28 for (size_t i = 0; i < plen; i++) {
105 27 char ch = pat[i];
106
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 12 times.
27 if (is_regex_special_char(ch)) {
107 15 buf[o++] = '\\';
108 }
109 27 buf[o++] = ch;
110 }
111 1 buf[o] = '\0';
112 1 return o;
113 }
114
115 1 char *regexp_escape(const char *pattern, size_t len)
116 {
117 1 size_t buflen = xmul(2, len) + 1;
118 1 char *buf = xmalloc(buflen);
119 1 regexp_escapeb(buf, buflen, pattern, len);
120 1 return buf;
121 }
122
123 33 const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern)
124 {
125
1/2
✓ Branch 0 (2→3) taken 33 times.
✗ Branch 1 (2→14) not taken.
33 if (pattern[0] == '\0') {
126 return NULL;
127 }
128
129 33 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
130
2/2
✓ Branch 0 (4→5) taken 32 times.
✓ Branch 1 (4→14) taken 1 times.
33 if (ir) {
131 return ir;
132 }
133
134 32 ir = xmalloc(sizeof(*ir));
135 32 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
136
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→10) taken 32 times.
32 if (unlikely(err)) {
137 regexp_error_msg(ebuf, &ir->re, pattern, err);
138 free(ir);
139 return NULL;
140 }
141
142 32 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
143 32 const char *str = str_intern(pattern);
144 32 ir->str = str;
145 32 return hashmap_insert(&interned_regexps, (char*)str, ir);
146 }
147
148 58 bool regexp_is_interned(const char *pattern)
149 {
150 58 return !!hashmap_find(&interned_regexps, pattern);
151 }
152
153 32 static void free_interned_regexp(InternedRegexp *ir)
154 {
155 32 regfree(&ir->re);
156 32 free(ir);
157 32 }
158
159 11 void free_interned_regexps(void)
160 {
161 11 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
162 11 hashmap_free(&interned_regexps, FREE_FUNC(free_interned_regexp));
163 11 }
164