dte test coverage


Directory: ./
File: src/regexp.c
Date: 2025-09-07 23:01:39
Exec Total Coverage
Lines: 64 73 87.7%
Functions: 9 10 90.0%
Branches: 16 26 61.5%

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "util/arith.h"
5 #include "util/ascii.h"
6 #include "util/debug.h"
7 #include "util/hashmap.h"
8 #include "util/intern.h"
9 #include "util/xmalloc.h"
10 #include "util/xstring.h"
11
12 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
13 static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS};
14
15 bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err)
16 {
17 if (!ebuf) {
18 return false;
19 }
20 char msg[1024];
21 regerror(err, re, msg, sizeof(msg));
22 return error_msg(ebuf, "%s: %s", msg, pattern);
23 }
24
25 3 const regex_t *regexp_compile_or_fatal_error(const char *pattern)
26 {
27 3 const InternedRegexp *ir = regexp_intern(NULL, pattern);
28
1/2
✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→5) taken 3 times.
3 FATAL_ERROR_ON(!ir, EINVAL);
29 3 return &ir->re;
30 }
31
32 143 bool regexp_exec (
33 const regex_t *re,
34 const char *text,
35 size_t text_len,
36 size_t nmatch,
37 regmatch_t *pmatch,
38 int flags
39 ) {
40 143 BUG_ON(nmatch && !pmatch);
41
42 // ASan's __interceptor_regexec() doesn't support REG_STARTEND
43 #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0
44 // "If REG_STARTEND is specified, pmatch must point to at least
45 // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified),
46 // to hold the input offsets for REG_STARTEND."
47 // -- https://man.openbsd.org/regex.3
48 143 regmatch_t tmp_startend;
49
2/2
✓ Branch 0 (4→5) taken 53 times.
✓ Branch 1 (4→6) taken 90 times.
143 pmatch = nmatch ? pmatch : &tmp_startend;
50 143 pmatch[0].rm_so = 0;
51 143 pmatch[0].rm_eo = text_len;
52 143 return !regexec(re, text, nmatch, pmatch, flags | REG_STARTEND);
53 #endif
54
55 // Buffer must be null-terminated if REG_STARTEND isn't supported
56 char *cstr = xstrcut(text, text_len);
57 int ret = !regexec(re, cstr, nmatch, pmatch, flags);
58 free(cstr);
59 return ret;
60 }
61
62 // Check which word boundary tokens are supported by regcomp(3)
63 // (if any) and initialize `rwbt` with them for later use
64 11 bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
65 {
66 11 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
67 11 const regoff_t match_start = 20, match_end = 23;
68 11 static const RegexpWordBoundaryTokens pairs[] = {
69 {"\\<", "\\>", 2},
70 {"[[:<:]]", "[[:>:]]", 7},
71 {"\\b", "\\b", 2},
72 };
73
74 11 BUG_ON(ARRAYLEN(text) <= match_end);
75 11 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
76
77
1/2
✓ Branch 0 (17→5) taken 11 times.
✗ Branch 1 (17→18) not taken.
11 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
78 11 const RegexpWordBoundaryTokens *p = &pairs[i];
79 11 char patt[32];
80 11 xmempcpy3(patt, p->start, p->len, STRN("(foo)"), p->end, p->len + 1);
81 11 regex_t re;
82
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→9) taken 11 times.
11 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
83 continue;
84 }
85 11 regmatch_t m[2];
86 11 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
87 11 regfree(&re);
88
3/6
✓ Branch 0 (11→12) taken 11 times.
✗ Branch 1 (11→15) not taken.
✓ Branch 2 (12→13) taken 11 times.
✗ Branch 3 (12→15) not taken.
✓ Branch 4 (13→14) taken 11 times.
✗ Branch 5 (13→15) not taken.
11 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
89 11 *rwbt = pairs[i];
90 11 return true;
91 }
92 }
93
94 return false;
95 }
96
97 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
98 {
99 1 BUG_ON(buflen < (2 * plen) + 1);
100 size_t o = 0;
101
2/2
✓ Branch 0 (7→4) taken 27 times.
✓ Branch 1 (7→8) taken 1 times.
28 for (size_t i = 0; i < plen; i++) {
102 27 char ch = pat[i];
103
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 12 times.
27 if (is_regex_special_char(ch)) {
104 15 buf[o++] = '\\';
105 }
106 27 buf[o++] = ch;
107 }
108 1 buf[o] = '\0';
109 1 return o;
110 }
111
112 1 char *regexp_escape(const char *pattern, size_t len)
113 {
114 1 size_t buflen = xmul(2, len) + 1;
115 1 char *buf = xmalloc(buflen);
116 1 regexp_escapeb(buf, buflen, pattern, len);
117 1 return buf;
118 }
119
120 36 const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern)
121 {
122
1/2
✓ Branch 0 (2→3) taken 36 times.
✗ Branch 1 (2→14) not taken.
36 if (pattern[0] == '\0') {
123 return NULL;
124 }
125
126 36 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
127
2/2
✓ Branch 0 (4→5) taken 35 times.
✓ Branch 1 (4→14) taken 1 times.
36 if (ir) {
128 return ir;
129 }
130
131 35 ir = xmalloc(sizeof(*ir));
132 35 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
133
1/2
✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→10) taken 35 times.
35 if (unlikely(err)) {
134 regexp_error_msg(ebuf, &ir->re, pattern, err);
135 free(ir);
136 return NULL;
137 }
138
139 35 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
140 35 const char *str = str_intern(pattern);
141 35 ir->str = str;
142 35 return hashmap_insert(&interned_regexps, (char*)str, ir);
143 }
144
145 58 bool regexp_is_interned(const char *pattern)
146 {
147 58 return !!hashmap_find(&interned_regexps, pattern);
148 }
149
150 35 static void free_interned_regexp(InternedRegexp *ir)
151 {
152 35 regfree(&ir->re);
153 35 free(ir);
154 35 }
155
156 11 void free_interned_regexps(void)
157 {
158 11 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
159 11 hashmap_free(&interned_regexps, FREE_FUNC(free_interned_regexp));
160 11 }
161