dte test coverage


Directory: ./
File: src/regexp.c
Date: 2025-07-13 15:27:15
Exec Total Coverage
Lines: 61 72 84.7%
Functions: 9 10 90.0%
Branches: 14 24 58.3%

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "util/arith.h"
5 #include "util/ascii.h"
6 #include "util/debug.h"
7 #include "util/hashmap.h"
8 #include "util/intern.h"
9 #include "util/xmalloc.h"
10 #include "util/xstring.h"
11
12 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
13 static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS};
14
15 bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err)
16 {
17 if (!ebuf) {
18 return false;
19 }
20 char msg[1024];
21 regerror(err, re, msg, sizeof(msg));
22 return error_msg(ebuf, "%s: %s", msg, pattern);
23 }
24
25 3 void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags)
26 {
27 // Note: DEFAULT_REGEX_FLAGS isn't used here because this function
28 // is only used for compiling built-in patterns, where we explicitly
29 // avoid using "enhanced" features
30 3 int err = regcomp(re, pattern, flags | REG_EXTENDED);
31
1/2
✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 3 times.
3 if (unlikely(err)) {
32 char msg[1024];
33 regerror(err, re, msg, sizeof(msg));
34 fatal_error(msg, EINVAL);
35 }
36 3 }
37
38 143 bool regexp_exec (
39 const regex_t *re,
40 const char *buf,
41 size_t size,
42 size_t nmatch,
43 regmatch_t *pmatch,
44 int flags
45 ) {
46 // ASan's __interceptor_regexec() doesn't support REG_STARTEND
47 #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0
48 // "If REG_STARTEND is specified, pmatch must point to at least
49 // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified),
50 // to hold the input offsets for REG_STARTEND."
51 // -- https://man.openbsd.org/regex.3
52 143 pmatch[0].rm_so = 0;
53 143 pmatch[0].rm_eo = size;
54 143 return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND);
55 #else
56 // Buffer must be null-terminated if REG_STARTEND isn't supported
57 char *tmp = xstrcut(buf, size);
58 int ret = !regexec(re, tmp, nmatch, pmatch, flags);
59 free(tmp);
60 return ret;
61 #endif
62 }
63
64 // Check which word boundary tokens are supported by regcomp(3)
65 // (if any) and initialize `rwbt` with them for later use
66 11 bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
67 {
68 11 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
69 11 const regoff_t match_start = 20, match_end = 23;
70 11 static const RegexpWordBoundaryTokens pairs[] = {
71 {"\\<", "\\>", 2},
72 {"[[:<:]]", "[[:>:]]", 7},
73 {"\\b", "\\b", 2},
74 };
75
76 11 BUG_ON(ARRAYLEN(text) <= match_end);
77 11 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
78
79
1/2
✓ Branch 0 (17→5) taken 11 times.
✗ Branch 1 (17→18) not taken.
11 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
80 11 const RegexpWordBoundaryTokens *p = &pairs[i];
81 11 char patt[32];
82 11 xmempcpy3(patt, p->start, p->len, STRN("(foo)"), p->end, p->len + 1);
83 11 regex_t re;
84
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→9) taken 11 times.
11 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
85 continue;
86 }
87 11 regmatch_t m[2];
88 11 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
89 11 regfree(&re);
90
3/6
✓ Branch 0 (11→12) taken 11 times.
✗ Branch 1 (11→15) not taken.
✓ Branch 2 (12→13) taken 11 times.
✗ Branch 3 (12→15) not taken.
✓ Branch 4 (13→14) taken 11 times.
✗ Branch 5 (13→15) not taken.
11 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
91 11 *rwbt = pairs[i];
92 11 return true;
93 }
94 }
95
96 return false;
97 }
98
99 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
100 {
101 1 BUG_ON(buflen < (2 * plen) + 1);
102 size_t o = 0;
103
2/2
✓ Branch 0 (7→4) taken 27 times.
✓ Branch 1 (7→8) taken 1 times.
28 for (size_t i = 0; i < plen; i++) {
104 27 char ch = pat[i];
105
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 12 times.
27 if (is_regex_special_char(ch)) {
106 15 buf[o++] = '\\';
107 }
108 27 buf[o++] = ch;
109 }
110 1 buf[o] = '\0';
111 1 return o;
112 }
113
114 1 char *regexp_escape(const char *pattern, size_t len)
115 {
116 1 size_t buflen = xmul(2, len) + 1;
117 1 char *buf = xmalloc(buflen);
118 1 regexp_escapeb(buf, buflen, pattern, len);
119 1 return buf;
120 }
121
122 33 const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern)
123 {
124
1/2
✓ Branch 0 (2→3) taken 33 times.
✗ Branch 1 (2→14) not taken.
33 if (pattern[0] == '\0') {
125 return NULL;
126 }
127
128 33 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
129
2/2
✓ Branch 0 (4→5) taken 32 times.
✓ Branch 1 (4→14) taken 1 times.
33 if (ir) {
130 return ir;
131 }
132
133 32 ir = xmalloc(sizeof(*ir));
134 32 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
135
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→10) taken 32 times.
32 if (unlikely(err)) {
136 regexp_error_msg(ebuf, &ir->re, pattern, err);
137 free(ir);
138 return NULL;
139 }
140
141 32 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
142 32 const char *str = str_intern(pattern);
143 32 ir->str = str;
144 32 return hashmap_insert(&interned_regexps, (char*)str, ir);
145 }
146
147 58 bool regexp_is_interned(const char *pattern)
148 {
149 58 return !!hashmap_find(&interned_regexps, pattern);
150 }
151
152 32 static void free_interned_regexp(InternedRegexp *ir)
153 {
154 32 regfree(&ir->re);
155 32 free(ir);
156 32 }
157
158 11 void free_interned_regexps(void)
159 {
160 11 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
161 11 hashmap_free(&interned_regexps, FREE_FUNC(free_interned_regexp));
162 11 }
163