dte test coverage


Directory: ./
File: src/filetype.c
Date: 2025-02-14 16:55:22
Exec Total Coverage
Lines: 163 170 95.9%
Functions: 18 18 100.0%
Branches: 99 114 86.8%

Line Branch Exec Source
1 #include <stdint.h>
2 #include <stdlib.h>
3 #include "filetype.h"
4 #include "command/serialize.h"
5 #include "regexp.h"
6 #include "util/array.h"
7 #include "util/ascii.h"
8 #include "util/bsearch.h"
9 #include "util/debug.h"
10 #include "util/hashset.h"
11 #include "util/log.h"
12 #include "util/path.h"
13 #include "util/str-util.h"
14 #include "util/strtonum.h"
15 #include "util/xmalloc.h"
16 #include "util/xmemmem.h"
17
18 1972 static int ft_compare(const void *key, const void *elem)
19 {
20 1972 const StringView *sv = key;
21 1972 const char *ext = elem; // Cast to first member of struct
22 1972 int res = memcmp(sv->data, ext, sv->length);
23
4/4
✓ Branch 0 (2→3) taken 152 times.
✓ Branch 1 (2→5) taken 1820 times.
✓ Branch 2 (3→4) taken 13 times.
✓ Branch 3 (3→5) taken 139 times.
1972 if (unlikely(res == 0 && ext[sv->length] != '\0')) {
24 13 res = -1;
25 }
26 1972 return res;
27 }
28
29 // Built-in filetypes
30 // NOLINTBEGIN(bugprone-suspicious-include)
31 #include "filetype/names.c"
32 #include "filetype/basenames.c"
33 #include "filetype/directories.c"
34 #include "filetype/extensions.c"
35 #include "filetype/interpreters.c"
36 #include "filetype/ignored-exts.c"
37 #include "filetype/signatures.c"
38 // NOLINTEND(bugprone-suspicious-include)
39
40 18 UNITTEST {
41 18 static_assert(NR_BUILTIN_FILETYPES < 256);
42 18 CHECK_BSEARCH_ARRAY(basenames, name, strcmp);
43 18 CHECK_BSEARCH_ARRAY(extensions, ext, strcmp);
44 18 CHECK_BSEARCH_ARRAY(interpreters, key, strcmp);
45 18 CHECK_BSEARCH_ARRAY(emacs_modes, name, strcmp);
46 18 CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp);
47 18 CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp);
48
49
2/2
✓ Branch 0 (12→9) taken 2178 times.
✓ Branch 1 (12→13) taken 18 times.
2196 for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) {
50 2178 const char *name = builtin_filetype_names[i];
51
1/2
✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→11) taken 2178 times.
2178 if (unlikely(!is_valid_filetype_name(name))) {
52 BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name);
53 }
54 }
55 18 }
56
57 typedef struct {
58 unsigned int str_len;
59 char str[] COUNTED_BY(str_len);
60 } FlexArrayStr;
61
62 // Filetypes dynamically added via the `ft` command.
63 // Not grouped by name to make it possible to order them freely.
64 typedef struct {
65 union {
66 FlexArrayStr *str;
67 const InternedRegexp *regexp;
68 } u;
69 uint8_t type; // FileDetectionType
70 char name[];
71 } UserFileTypeEntry;
72
73 121 static bool ft_uses_regex(FileDetectionType type)
74 {
75 121 return type == FT_CONTENT || type == FT_FILENAME;
76 }
77
78 18 bool add_filetype (
79 PointerArray *filetypes,
80 const char *name,
81 const char *str,
82 FileDetectionType type,
83 ErrorBuffer *ebuf
84 ) {
85 18 BUG_ON(!is_valid_filetype_name(name));
86 18 const InternedRegexp *ir = NULL;
87
2/2
✓ Branch 0 (4→5) taken 14 times.
✓ Branch 1 (4→7) taken 4 times.
18 if (ft_uses_regex(type)) {
88 14 ir = regexp_intern(ebuf, str);
89
1/2
✓ Branch 0 (6→7) taken 14 times.
✗ Branch 1 (6→14) not taken.
14 if (unlikely(!ir)) {
90 return false;
91 }
92 }
93
94 18 size_t name_len = strlen(name);
95 18 UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1);
96 18 ft->type = type;
97
98
2/2
✓ Branch 0 (8→9) taken 14 times.
✓ Branch 1 (8→10) taken 4 times.
18 if (ir) {
99 14 ft->u.regexp = ir;
100 } else {
101 4 size_t str_len = strlen(str);
102 4 FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1);
103 4 s->str_len = str_len;
104 4 ft->u.str = s;
105 4 memcpy(s->str, str, str_len + 1);
106 }
107
108 18 memcpy(ft->name, name, name_len + 1);
109 18 ptr_array_append(filetypes, ft);
110 18 return true;
111 }
112
113 294 static StringView path_extension(StringView filename)
114 {
115 294 StringView ext = filename;
116 294 ssize_t pos = strview_memrchr_idx(&ext, '.');
117
2/2
✓ Branch 0 (2→3) taken 116 times.
✓ Branch 1 (2→4) taken 178 times.
294 strview_remove_prefix(&ext, pos > 0 ? pos + 1 : ext.length);
118 294 return ext;
119 }
120
121 279 static StringView get_filename_extension(StringView filename)
122 {
123 279 StringView ext = path_extension(filename);
124
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 264 times.
279 if (is_ignored_extension(ext)) {
125 15 filename.length -= ext.length + 1;
126 15 ext = path_extension(filename);
127 }
128 279 strview_remove_matching_suffix(&ext, "~");
129 279 return ext;
130 }
131
132 // Parse hashbang and return interpreter name, without version number.
133 // For example, if line is "#!/usr/bin/env python2", "python" is returned.
134 279 static StringView get_interpreter(StringView line)
135 {
136 279 StringView sv = STRING_VIEW_INIT;
137
2/2
✓ Branch 0 (3→4) taken 208 times.
✓ Branch 1 (3→5) taken 71 times.
279 if (!strview_remove_matching_prefix(&line, "#!")) {
138 208 return sv;
139 }
140
141 71 strview_trim_left(&line);
142
3/4
✓ Branch 0 (6→7) taken 71 times.
✗ Branch 1 (6→8) not taken.
✓ Branch 2 (7→8) taken 1 times.
✓ Branch 3 (7→9) taken 70 times.
71 if (line.length < 2 || line.data[0] != '/') {
143 1 return sv;
144 }
145
146 70 size_t pos = 0;
147 70 sv = get_delim(line.data, &pos, line.length, ' ');
148
4/4
✓ Branch 0 (10→11) taken 8 times.
✓ Branch 1 (10→17) taken 62 times.
✓ Branch 2 (12→14) taken 6 times.
✓ Branch 3 (12→17) taken 2 times.
70 if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) {
149
4/4
✓ Branch 0 (14→15) taken 6 times.
✓ Branch 1 (14→16) taken 1 times.
✓ Branch 2 (15→13) taken 1 times.
✓ Branch 3 (15→16) taken 5 times.
7 while (pos + 1 < line.length && line.data[pos] == ' ') {
150 1 pos++;
151 }
152 6 sv = get_delim(line.data, &pos, line.length, ' ');
153 }
154
155 70 ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/');
156
2/2
✓ Branch 0 (17→18) taken 65 times.
✓ Branch 1 (17→20) taken 5 times.
70 if (last_slash_idx >= 0) {
157 65 strview_remove_prefix(&sv, last_slash_idx + 1);
158 }
159
160
4/4
✓ Branch 0 (21→22) taken 81 times.
✓ Branch 1 (21→23) taken 5 times.
✓ Branch 2 (22→19) taken 16 times.
✓ Branch 3 (22→23) taken 65 times.
86 while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) {
161 16 sv.length--;
162 }
163
164 70 return sv;
165 }
166
167 24 static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv)
168 {
169 24 const FlexArrayStr *s = ft->u.str;
170
4/4
✓ Branch 0 (2→3) taken 15 times.
✓ Branch 1 (2→6) taken 9 times.
✓ Branch 2 (4→5) taken 12 times.
✓ Branch 3 (4→6) taken 3 times.
24 return sv.length > 0 && strview_equal_strn(&sv, s->str, s->str_len);
171 }
172
173 58 static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv)
174 {
175 58 const regex_t *re = &ft->u.regexp->re;
176 58 regmatch_t m;
177
4/4
✓ Branch 0 (2→3) taken 52 times.
✓ Branch 1 (2→6) taken 6 times.
✓ Branch 2 (4→5) taken 50 times.
✓ Branch 3 (4→6) taken 2 times.
58 return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0);
178 }
179
180 82 static bool ft_match(const UserFileTypeEntry *ft, const StringView sv)
181 {
182 82 FileDetectionType t = ft->type;
183
2/2
✓ Branch 0 (2→3) taken 58 times.
✓ Branch 1 (2→4) taken 24 times.
82 return ft_uses_regex(t) ? ft_regex_match(ft, sv) : ft_str_match(ft, sv);
184 }
185
186 typedef FileTypeEnum (*FileTypeLookupFunc)(const StringView sv);
187
188 279 const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line)
189 {
190
2/2
✓ Branch 0 (2→3) taken 153 times.
✓ Branch 1 (2→4) taken 126 times.
279 const char *b = filename ? path_basename(filename) : NULL;
191 279 const StringView base = strview_from_cstring(b);
192 279 const StringView ext = get_filename_extension(base);
193 279 const StringView path = strview_from_cstring(filename);
194 279 const StringView interpreter = get_interpreter(line);
195 279 BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0));
196 279 BUG_ON(line.length == 0 && interpreter.length != 0);
197
198 // The order of elements in this array determines the order of
199 // precedence for the lookup() functions (but note that changing
200 // the initializer below makes no difference to the array order)
201 279 static const FileTypeLookupFunc funcs[] = {
202 [FT_INTERPRETER] = filetype_from_interpreter,
203 [FT_BASENAME] = filetype_from_basename,
204 [FT_CONTENT] = filetype_from_signature,
205 [FT_EXTENSION] = filetype_from_extension,
206 [FT_FILENAME] = filetype_from_dir_prefix,
207 };
208
209 279 const StringView params[] = {
210 [FT_INTERPRETER] = interpreter,
211 [FT_BASENAME] = base,
212 [FT_CONTENT] = line,
213 [FT_EXTENSION] = ext,
214 [FT_FILENAME] = path,
215 };
216
217 // Search user `ft` entries
218
2/2
✓ Branch 0 (18→14) taken 82 times.
✓ Branch 1 (18→25) taken 274 times.
356 for (size_t i = 0, n = filetypes->count; i < n; i++) {
219 82 const UserFileTypeEntry *ft = filetypes->ptrs[i];
220
2/2
✓ Branch 0 (15→16) taken 5 times.
✓ Branch 1 (15→17) taken 77 times.
82 if (ft_match(ft, params[ft->type])) {
221 5 return ft->name;
222 }
223 }
224
225 // Search built-in lookup tables
226 static_assert(ARRAYLEN(funcs) == ARRAYLEN(params));
227
2/2
✓ Branch 0 (25→19) taken 916 times.
✓ Branch 1 (25→26) taken 94 times.
1010 for (FileDetectionType i = 0; i < ARRAYLEN(funcs); i++) {
228 916 BUG_ON(!funcs[i]);
229 916 FileTypeEnum ft = funcs[i](params[i]);
230
2/2
✓ Branch 0 (22→23) taken 180 times.
✓ Branch 1 (22→24) taken 736 times.
916 if (ft != NONE) {
231 180 return builtin_filetype_names[ft];
232 }
233 }
234
235 // Use "ini" filetype if first line looks like an ini [section]
236 94 strview_trim_right(&line);
237
2/2
✓ Branch 0 (27→28) taken 54 times.
✓ Branch 1 (27→32) taken 40 times.
94 if (line.length >= 4) {
238 54 const char *s = line.data;
239 54 const size_t n = line.length;
240
6/6
✓ Branch 0 (28→29) taken 5 times.
✓ Branch 1 (28→32) taken 49 times.
✓ Branch 2 (29→30) taken 3 times.
✓ Branch 3 (29→32) taken 2 times.
✓ Branch 4 (30→31) taken 2 times.
✓ Branch 5 (30→32) taken 1 times.
54 if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) {
241
1/2
✗ Branch 0 (31→32) not taken.
✓ Branch 1 (31→48) taken 2 times.
2 if (!strview_contains_char_type(&line, ASCII_CNTRL)) {
242 return builtin_filetype_names[INI];
243 }
244 }
245 }
246
247
2/2
✓ Branch 0 (33→34) taken 2 times.
✓ Branch 1 (33→48) taken 90 times.
92 if (strview_equal_cstring(&ext, "conf")) {
248
2/2
✓ Branch 0 (35→36) taken 1 times.
✓ Branch 1 (35→47) taken 1 times.
2 if (strview_has_prefix(&path, "/etc/systemd/")) {
249 2 return builtin_filetype_names[INI];
250 }
251 1 BUG_ON(!filename);
252 1 const StringView dir = path_slice_dirname(filename);
253
1/2
✗ Branch 0 (39→40) not taken.
✓ Branch 1 (39→47) taken 1 times.
1 if (
254 1 strview_has_prefix(&path, "/etc/")
255 || strview_has_prefix(&path, "/usr/share/")
256 || strview_has_prefix(&path, "/usr/local/share/")
257 || strview_has_suffix(&dir, "/tmpfiles.d")
258 ) {
259 return builtin_filetype_names[CONFIG];
260 }
261 }
262
263 return NULL;
264 }
265
266 30 bool is_ft(const PointerArray *filetypes, const char *name)
267 {
268
2/2
✓ Branch 0 (3→4) taken 14 times.
✓ Branch 1 (3→8) taken 16 times.
30 if (BSEARCH(name, builtin_filetype_names, vstrcmp)) {
269 return true;
270 }
271
272
2/2
✓ Branch 0 (7→5) taken 26 times.
✓ Branch 1 (7→8) taken 8 times.
34 for (size_t i = 0, n = filetypes->count; i < n; i++) {
273 26 const UserFileTypeEntry *ft = filetypes->ptrs[i];
274
2/2
✓ Branch 0 (5→6) taken 20 times.
✓ Branch 1 (5→8) taken 6 times.
26 if (streq(ft->name, name)) {
275 return true;
276 }
277 }
278
279 return false;
280 }
281
282 2 void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix)
283 {
284 // Insert all filetype names beginning with `prefix` into a HashSet
285 // (to avoid duplicates)
286 2 HashSet set;
287 2 size_t prefix_len = strlen(prefix);
288 2 size_t nr_builtin_ft = ARRAYLEN(builtin_filetype_names);
289
1/2
✓ Branch 0 (2→3) taken 2 times.
✗ Branch 1 (2→4) not taken.
2 hashset_init(&set, 20 + (prefix[0] == '\0' ? nr_builtin_ft : 0), false);
290
291
2/2
✓ Branch 0 (9→6) taken 242 times.
✓ Branch 1 (9→10) taken 2 times.
244 for (size_t i = 0; i < nr_builtin_ft; i++) {
292 242 const char *name = builtin_filetype_names[i];
293
2/2
✓ Branch 0 (6→7) taken 2 times.
✓ Branch 1 (6→8) taken 240 times.
242 if (str_has_strn_prefix(name, prefix, prefix_len)) {
294 2 hashset_insert(&set, name, strlen(name));
295 }
296 }
297
298
1/2
✗ Branch 0 (14→11) not taken.
✓ Branch 1 (14→15) taken 2 times.
2 for (size_t i = 0, n = filetypes->count; i < n; i++) {
299 const UserFileTypeEntry *ft = filetypes->ptrs[i];
300 const char *name = ft->name;
301 if (str_has_strn_prefix(name, prefix, prefix_len)) {
302 hashset_insert(&set, name, strlen(name));
303 }
304 }
305
306 // Append the collected strings to the PointerArray
307
2/2
✓ Branch 0 (19→16) taken 2 times.
✓ Branch 1 (19→20) taken 2 times.
4 for (HashSetIter iter = hashset_iter(&set); hashset_next(&iter); ) {
308 2 ptr_array_append(a, xmemdup(iter.entry->str, iter.entry->str_len + 1));
309 }
310
311 2 hashset_free(&set);
312 2 }
313
314 3 static const char *ft_get_str(const UserFileTypeEntry *ft)
315 {
316
2/2
✓ Branch 0 (2→3) taken 2 times.
✓ Branch 1 (2→4) taken 1 times.
3 return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str;
317 }
318
319 1 String dump_filetypes(const PointerArray *filetypes)
320 {
321 1 static const char flags[][4] = {
322 [FT_EXTENSION] = "",
323 [FT_FILENAME] = "-f ",
324 [FT_CONTENT] = "-c ",
325 [FT_INTERPRETER] = "-i ",
326 [FT_BASENAME] = "-b ",
327 };
328
329 1 String s = string_new(4096);
330
2/2
✓ Branch 0 (15→4) taken 3 times.
✓ Branch 1 (15→16) taken 1 times.
4 for (size_t i = 0, n = filetypes->count; i < n; i++) {
331 3 const UserFileTypeEntry *ft = filetypes->ptrs[i];
332 3 BUG_ON(ft->type >= ARRAYLEN(flags));
333 3 BUG_ON(ft->name[0] == '-');
334 3 string_append_literal(&s, "ft ");
335 3 string_append_cstring(&s, flags[ft->type]);
336 3 string_append_escaped_arg(&s, ft->name, true);
337 3 string_append_byte(&s, ' ');
338 3 string_append_escaped_arg(&s, ft_get_str(ft), true);
339 3 string_append_byte(&s, '\n');
340 }
341 1 return s;
342 }
343
344 18 static void free_filetype_entry(UserFileTypeEntry *ft)
345 {
346
2/2
✓ Branch 0 (2→3) taken 4 times.
✓ Branch 1 (2→4) taken 14 times.
18 if (!ft_uses_regex(ft->type)) {
347 4 free(ft->u.str);
348 }
349 18 free(ft);
350 18 }
351
352 9 void free_filetypes(PointerArray *filetypes)
353 {
354 9 ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry));
355 9 }
356
357 2580 bool is_valid_filetype_name_sv(const StringView *name)
358 {
359 2580 const char *data = name->data;
360 2580 const size_t len = name->length;
361
4/4
✓ Branch 0 (2→3) taken 2571 times.
✓ Branch 1 (2→8) taken 9 times.
✓ Branch 2 (3→7) taken 2568 times.
✓ Branch 3 (3→8) taken 3 times.
2580 if (unlikely(len == 0 || len > FILETYPE_NAME_MAX || data[0] == '-')) {
362 return false;
363 }
364
365 const AsciiCharType mask = ASCII_SPACE | ASCII_CNTRL;
366
2/2
✓ Branch 0 (7→4) taken 13068 times.
✓ Branch 1 (7→8) taken 2558 times.
15626 for (size_t i = 0; i < len; i++) {
367 13068 unsigned char ch = data[i];
368
4/4
✓ Branch 0 (4→5) taken 13060 times.
✓ Branch 1 (4→8) taken 8 times.
✓ Branch 2 (5→6) taken 13058 times.
✓ Branch 3 (5→8) taken 2 times.
13068 if (unlikely(ascii_test(ch, mask) || ch == '/')) {
369 return false;
370 }
371 }
372
373 return true;
374 }
375