| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <errno.h> | ||
| 2 | #include <stdlib.h> | ||
| 3 | #include "regexp.h" | ||
| 4 | #include "util/arith.h" | ||
| 5 | #include "util/ascii.h" | ||
| 6 | #include "util/debug.h" | ||
| 7 | #include "util/hashmap.h" | ||
| 8 | #include "util/intern.h" | ||
| 9 | #include "util/xmalloc.h" | ||
| 10 | #include "util/xstring.h" | ||
| 11 | |||
| 12 | // NOLINTNEXTLINE(*-avoid-non-const-global-variables) | ||
| 13 | static HashMap interned_regexps = {.flags = HMAP_BORROWED_KEYS}; | ||
| 14 | |||
| 15 | ✗ | bool regexp_error_msg(ErrorBuffer *ebuf, const regex_t *re, const char *pattern, int err) | |
| 16 | { | ||
| 17 | ✗ | if (!ebuf) { | |
| 18 | return false; | ||
| 19 | } | ||
| 20 | ✗ | char msg[1024]; | |
| 21 | ✗ | regerror(err, re, msg, sizeof(msg)); | |
| 22 | ✗ | return error_msg(ebuf, "%s: %s", msg, pattern); | |
| 23 | } | ||
| 24 | |||
| 25 | 3 | const regex_t *regexp_compile_or_fatal_error(const char *pattern) | |
| 26 | { | ||
| 27 | 3 | const InternedRegexp *ir = regexp_intern(NULL, pattern); | |
| 28 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→5) taken 3 times.
|
3 | FATAL_ERROR_ON(!ir, EINVAL); |
| 29 | 3 | return &ir->re; | |
| 30 | } | ||
| 31 | |||
| 32 | 143 | bool regexp_exec ( | |
| 33 | const regex_t *re, | ||
| 34 | const char *text, | ||
| 35 | size_t text_len, | ||
| 36 | size_t nmatch, | ||
| 37 | regmatch_t *pmatch, | ||
| 38 | int flags | ||
| 39 | ) { | ||
| 40 | 143 | BUG_ON(nmatch && !pmatch); | |
| 41 | |||
| 42 | // ASan's __interceptor_regexec() doesn't support REG_STARTEND | ||
| 43 | #if defined(REG_STARTEND) && ASAN_ENABLED == 0 && MSAN_ENABLED == 0 | ||
| 44 | // "If REG_STARTEND is specified, pmatch must point to at least | ||
| 45 | // one regmatch_t (even if nmatch is 0 or REG_NOSUB was specified), | ||
| 46 | // to hold the input offsets for REG_STARTEND." | ||
| 47 | // -- https://man.openbsd.org/regex.3 | ||
| 48 | 143 | regmatch_t tmp_startend; | |
| 49 |
2/2✓ Branch 0 (4→5) taken 53 times.
✓ Branch 1 (4→6) taken 90 times.
|
143 | pmatch = nmatch ? pmatch : &tmp_startend; |
| 50 | 143 | pmatch[0].rm_so = 0; | |
| 51 | 143 | pmatch[0].rm_eo = text_len; | |
| 52 | 143 | return !regexec(re, text, nmatch, pmatch, flags | REG_STARTEND); | |
| 53 | #endif | ||
| 54 | |||
| 55 | // Buffer must be null-terminated if REG_STARTEND isn't supported | ||
| 56 | char *cstr = xstrcut(text, text_len); | ||
| 57 | int ret = !regexec(re, cstr, nmatch, pmatch, flags); | ||
| 58 | free(cstr); | ||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 62 | // Check which word boundary tokens are supported by regcomp(3) | ||
| 63 | // (if any) and initialize `rwbt` with them for later use | ||
| 64 | 11 | bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt) | |
| 65 | { | ||
| 66 | 11 | static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE"; | |
| 67 | 11 | const regoff_t match_start = 20, match_end = 23; | |
| 68 | 11 | static const RegexpWordBoundaryTokens pairs[] = { | |
| 69 | {"\\<", "\\>", 2}, | ||
| 70 | {"[[:<:]]", "[[:>:]]", 7}, | ||
| 71 | {"\\b", "\\b", 2}, | ||
| 72 | }; | ||
| 73 | |||
| 74 | 11 | BUG_ON(ARRAYLEN(text) <= match_end); | |
| 75 | 11 | BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5)); | |
| 76 | |||
| 77 |
1/2✓ Branch 0 (17→5) taken 11 times.
✗ Branch 1 (17→18) not taken.
|
11 | for (size_t i = 0; i < ARRAYLEN(pairs); i++) { |
| 78 | 11 | const RegexpWordBoundaryTokens *p = &pairs[i]; | |
| 79 | 11 | char patt[32]; | |
| 80 | 11 | xmempcpy3(patt, p->start, p->len, STRN("(foo)"), p->end, p->len + 1); | |
| 81 | 11 | regex_t re; | |
| 82 |
1/2✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→9) taken 11 times.
|
11 | if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) { |
| 83 | ✗ | continue; | |
| 84 | } | ||
| 85 | 11 | regmatch_t m[2]; | |
| 86 | 11 | bool match = !regexec(&re, text, ARRAYLEN(m), m, 0); | |
| 87 | 11 | regfree(&re); | |
| 88 |
3/6✓ Branch 0 (11→12) taken 11 times.
✗ Branch 1 (11→15) not taken.
✓ Branch 2 (12→13) taken 11 times.
✗ Branch 3 (12→15) not taken.
✓ Branch 4 (13→14) taken 11 times.
✗ Branch 5 (13→15) not taken.
|
11 | if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) { |
| 89 | 11 | *rwbt = pairs[i]; | |
| 90 | 11 | return true; | |
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | return false; | ||
| 95 | } | ||
| 96 | |||
| 97 | 1 | size_t regexp_escapeb(char *buf, size_t buflen, const char *pat, size_t plen) | |
| 98 | { | ||
| 99 | 1 | BUG_ON(buflen < (2 * plen) + 1); | |
| 100 | size_t o = 0; | ||
| 101 |
2/2✓ Branch 0 (7→4) taken 27 times.
✓ Branch 1 (7→8) taken 1 times.
|
28 | for (size_t i = 0; i < plen; i++) { |
| 102 | 27 | char ch = pat[i]; | |
| 103 |
2/2✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 12 times.
|
27 | if (is_regex_special_char(ch)) { |
| 104 | 15 | buf[o++] = '\\'; | |
| 105 | } | ||
| 106 | 27 | buf[o++] = ch; | |
| 107 | } | ||
| 108 | 1 | buf[o] = '\0'; | |
| 109 | 1 | return o; | |
| 110 | } | ||
| 111 | |||
| 112 | 1 | char *regexp_escape(const char *pattern, size_t len) | |
| 113 | { | ||
| 114 | 1 | size_t buflen = xmul(2, len) + 1; | |
| 115 | 1 | char *buf = xmalloc(buflen); | |
| 116 | 1 | regexp_escapeb(buf, buflen, pattern, len); | |
| 117 | 1 | return buf; | |
| 118 | } | ||
| 119 | |||
| 120 | 36 | const InternedRegexp *regexp_intern(ErrorBuffer *ebuf, const char *pattern) | |
| 121 | { | ||
| 122 |
1/2✓ Branch 0 (2→3) taken 36 times.
✗ Branch 1 (2→14) not taken.
|
36 | if (pattern[0] == '\0') { |
| 123 | return NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | 36 | InternedRegexp *ir = hashmap_get(&interned_regexps, pattern); | |
| 127 |
2/2✓ Branch 0 (4→5) taken 35 times.
✓ Branch 1 (4→14) taken 1 times.
|
36 | if (ir) { |
| 128 | return ir; | ||
| 129 | } | ||
| 130 | |||
| 131 | 35 | ir = xmalloc(sizeof(*ir)); | |
| 132 | 35 | int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB); | |
| 133 |
1/2✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→10) taken 35 times.
|
35 | if (unlikely(err)) { |
| 134 | ✗ | regexp_error_msg(ebuf, &ir->re, pattern, err); | |
| 135 | ✗ | free(ir); | |
| 136 | ✗ | return NULL; | |
| 137 | } | ||
| 138 | |||
| 139 | 35 | BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS)); | |
| 140 | 35 | const char *str = str_intern(pattern); | |
| 141 | 35 | ir->str = str; | |
| 142 | 35 | return hashmap_insert(&interned_regexps, (char*)str, ir); | |
| 143 | } | ||
| 144 | |||
| 145 | 58 | bool regexp_is_interned(const char *pattern) | |
| 146 | { | ||
| 147 | 58 | return !!hashmap_find(&interned_regexps, pattern); | |
| 148 | } | ||
| 149 | |||
| 150 | 35 | static void free_interned_regexp(InternedRegexp *ir) | |
| 151 | { | ||
| 152 | 35 | regfree(&ir->re); | |
| 153 | 35 | free(ir); | |
| 154 | 35 | } | |
| 155 | |||
| 156 | 11 | void free_interned_regexps(void) | |
| 157 | { | ||
| 158 | 11 | BUG_ON(!(interned_regexps.flags & HMAP_BORROWED_KEYS)); | |
| 159 | 11 | hashmap_free(&interned_regexps, FREE_FUNC(free_interned_regexp)); | |
| 160 | 11 | } | |
| 161 |