dte test coverage


Directory: ./
File: src/regexp.c
Date: 2026-01-09 16:07:09
Coverage Exec Excl Total
Lines: 86.5% 64 0 74
Functions: 90.0% 9 0 10
Branches: 61.5% 16 0 26

Line Branch Exec Source
1 #include <errno.h>
2 #include <stdlib.h>
3 #include "regexp.h"
4 #include "util/arith.h"
5 #include "util/ascii.h"
6 #include "util/debug.h"
7 #include "util/hashmap.h"
8 #include "util/intern.h"
9 #include "util/xmalloc.h"
10 #include "util/xstring.h"
11
12 // NOLINTNEXTLINE(*-avoid-non-const-global-variables)
13 static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS};
14
15 bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err)
16 {
17 if (!ebuf) {
18 return false;
19 }
20 char msg[1024];
21 regerror(err, re, msg, sizeof(msg));
22 return error_msg(ebuf, "%s: %s", msg, pattern);
23 }
24
25 3 const regex_t *regexp_compile_or_fatal_error(const char *pattern)
26 {
27 3 const InternedRegexp *ir = regexp_intern(NULL, pattern);
28
1/2
✗ Branch 3 → 4 not taken.
✓ Branch 3 → 5 taken 3 times.
3 FATAL_ERROR_ON(!ir, EINVAL);
29 3 return &ir->re;
30 }
31
32 145 bool regexp_exec (
33 const regex_t *re,
34 const char *text,
35 size_t text_len,
36 size_t nmatch,
37 regmatch_t *pmatch,
38 int flags
39 ) {
40 145 BUG_ON(nmatch && !pmatch);
41
42 145 if (HAVE_REG_STARTEND) {
43 // "If REG_STARTEND is specified, pmatch must point to at least
44 // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified),
45 // to hold the input offsets for REG_STARTEND."
46 // -- https://man.openbsd.org/regexec#:~:text=If-,REG_STARTEND,-is%20specified
47 145 regmatch_t tmp_startend;
48
2/2
✓ Branch 4 → 5 taken 53 times.
✓ Branch 4 → 6 taken 92 times.
145 pmatch = nmatch ? pmatch : &tmp_startend;
49 145 pmatch[0].rm_so = 0;
50 145 pmatch[0].rm_eo = text_len;
51 145 return !regexec(re, text, nmatch, pmatch, flags | REGEXP_STARTEND_FLAG);
52 }
53
54 // Buffer must be null-terminated if REG_STARTEND isn't supported
55 char *cstr = xstrcut(text, text_len);
56 int ret = !regexec(re, cstr, nmatch, pmatch, flags);
57 free(cstr);
58 return ret;
59 }
60
61 // Check which word boundary tokens are supported by regcomp(3)
62 // (if any) and initialize `rwbt` with them for later use
63 11 RegexpWordBoundaryTokens regexp_get_word_boundary_tokens(void)
64 {
65 11 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
66 11 const regoff_t match_start = 20, match_end = 23;
67 11 static const RegexpWordBoundaryTokens pairs[] = {
68 {"\\<", "\\>", 2},
69 {"[[:<:]]", "[[:>:]]", 7},
70 {"\\b", "\\b", 2},
71 };
72
73 11 BUG_ON(ARRAYLEN(text) <= match_end);
74 11 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
75
76 UNROLL_LOOP(ARRAYLEN(pairs))
77
1/2
✓ Branch 17 → 5 taken 11 times.
✗ Branch 17 → 18 not taken.
11 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
78 11 const RegexpWordBoundaryTokens *p = &pairs[i];
79 11 char patt[32];
80 11 xmempcpy4(patt, p->start, p->len, STRN("(foo)"), p->end, p->len, "", 1);
81 11 regex_t re;
82
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 11 times.
11 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
83 continue;
84 }
85 11 regmatch_t m[2];
86 11 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
87 11 regfree(&re);
88
3/6
✓ Branch 11 → 12 taken 11 times.
✗ Branch 11 → 15 not taken.
✓ Branch 12 → 13 taken 11 times.
✗ Branch 12 → 15 not taken.
✓ Branch 13 → 14 taken 11 times.
✗ Branch 13 → 15 not taken.
11 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
89 11 return pairs[i];
90 }
91 }
92
93 return (RegexpWordBoundaryTokens){.len = 0};
94 }
95
96 1 size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen)
97 {
98 1 BUG_ON(buflen < (2 * plen) + 1);
99 size_t o = 0;
100
2/2
✓ Branch 7 → 4 taken 27 times.
✓ Branch 7 → 8 taken 1 time.
28 for (size_t i = 0; i < plen; i++) {
101 27 char ch = pat[i];
102
2/2
✓ Branch 4 → 5 taken 15 times.
✓ Branch 4 → 6 taken 12 times.
27 if (is_regex_special_char(ch)) {
103 15 buf[o++] = '\\';
104 }
105 27 buf[o++] = ch;
106 }
107 1 buf[o] = '\0';
108 1 return o;
109 }
110
111 1 char *regexp_escape(const char *pattern, size_t len)
112 {
113 1 size_t buflen = xmul(2, len) + 1;
114 1 char *buf = xmalloc(buflen);
115 1 regexp_escapeb(buf, buflen, pattern, len);
116 1 return buf;
117 }
118
119 36 const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern)
120 {
121
1/2
✓ Branch 2 → 3 taken 36 times.
✗ Branch 2 → 14 not taken.
36 if (pattern[0] == '\0') {
122 return NULL;
123 }
124
125 36 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
126
2/2
✓ Branch 4 → 5 taken 35 times.
✓ Branch 4 → 14 taken 1 time.
36 if (ir) {
127 return ir;
128 }
129
130 35 ir = xmalloc(sizeof(*ir));
131 35 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
132
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 10 taken 35 times.
35 if (unlikely(err)) {
133 regexp_error_msg(ebuf, &ir->re, pattern, err);
134 free(ir);
135 return NULL;
136 }
137
138 35 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
139 35 const char *str = str_intern(pattern);
140 35 ir->str = str;
141 35 return hashmap_insert(&interned_regexps, (char*)str, ir);
142 }
143
144 58 bool regexp_is_interned(const char *pattern)
145 {
146 58 return !!hashmap_find(&interned_regexps, pattern);
147 }
148
149 35 static void free_interned_regexp(InternedRegexp *ir)
150 {
151 35 regfree(&ir->re);
152 35 free(ir);
153 35 }
154
155 11 void free_interned_regexps(void)
156 {
157 11 BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS));
158 11 hashmap_free(&interned_regexps, FREE_FUNC(free_interned_regexp));
159 11 }
160