dte test coverage


Directory: ./
File: src/filetype.c
Date: 2025-07-19 20:13:10
Exec Total Coverage
Lines: 162 174 93.1%
Functions: 18 19 94.7%
Branches: 99 116 85.3%

Line Branch Exec Source
1 #include <stdint.h>
2 #include <stdlib.h>
3 #include "filetype.h"
4 #include "command/serialize.h"
5 #include "regexp.h"
6 #include "util/ascii.h"
7 #include "util/bsearch.h"
8 #include "util/debug.h"
9 #include "util/hashset.h"
10 #include "util/log.h"
11 #include "util/path.h"
12 #include "util/str-util.h"
13 #include "util/strtonum.h"
14 #include "util/xmalloc.h"
15 #include "util/xmemmem.h"
16
17 1997 static int ft_compare(const void *key, const void *elem)
18 {
19 1997 const StringView *sv = key;
20 1997 const char *ext = elem; // Cast to first member of struct
21 1997 int res = memcmp(sv->data, ext, sv->length);
22
4/4
✓ Branch 0 (2→3) taken 154 times.
✓ Branch 1 (2→5) taken 1843 times.
✓ Branch 2 (3→4) taken 12 times.
✓ Branch 3 (3→5) taken 142 times.
1997 if (unlikely(res == 0 && ext[sv->length] != '\0')) {
23 12 res = -1;
24 }
25 1997 return res;
26 }
27
28 // Built-in filetypes
29 // NOLINTBEGIN(bugprone-suspicious-include)
30 #include "filetype/names.c"
31 #include "filetype/basenames.c"
32 #include "filetype/directories.c"
33 #include "filetype/extensions.c"
34 #include "filetype/interpreters.c"
35 #include "filetype/ignored-exts.c"
36 #include "filetype/signatures.c"
37 // NOLINTEND(bugprone-suspicious-include)
38
39 24 UNITTEST {
40 24 static_assert(NR_BUILTIN_FILETYPES < 256);
41 24 CHECK_BSEARCH_ARRAY(basenames, name);
42 24 CHECK_BSEARCH_ARRAY(extensions, ext);
43 24 CHECK_BSEARCH_ARRAY(interpreters, key);
44 24 CHECK_BSEARCH_ARRAY(emacs_modes, name);
45 24 CHECK_BSEARCH_STR_ARRAY(ignored_extensions);
46 24 CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names);
47
48
2/2
✓ Branch 0 (12→9) taken 3000 times.
✓ Branch 1 (12→13) taken 24 times.
3024 for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) {
49 3000 const char *name = builtin_filetype_names[i];
50
1/2
✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→11) taken 3000 times.
3000 if (unlikely(!is_valid_filetype_name(name))) {
51 BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name);
52 }
53 }
54 24 }
55
56 typedef struct {
57 unsigned int str_len;
58 char str[] COUNTED_BY(str_len);
59 } FlexArrayStr;
60
61 // Filetypes dynamically added via the `ft` command.
62 // Not grouped by name to make it possible to order them freely.
63 typedef struct {
64 union {
65 FlexArrayStr *str;
66 const InternedRegexp *regexp;
67 } u;
68 uint8_t type; // FileDetectionType
69 char name[];
70 } UserFileTypeEntry;
71
72 133 static bool ft_uses_regex(FileDetectionType type)
73 {
74 133 return type == FT_CONTENT || type == FT_FILENAME;
75 }
76
77 24 bool add_filetype (
78 PointerArray *filetypes,
79 const char *name,
80 const char *str,
81 FileDetectionType type,
82 ErrorBuffer *ebuf
83 ) {
84 24 BUG_ON(!is_valid_filetype_name(name));
85 24 const InternedRegexp *ir = NULL;
86
2/2
✓ Branch 0 (4→5) taken 20 times.
✓ Branch 1 (4→7) taken 4 times.
24 if (ft_uses_regex(type)) {
87 20 ir = regexp_intern(ebuf, str);
88
1/2
✓ Branch 0 (6→7) taken 20 times.
✗ Branch 1 (6→14) not taken.
20 if (unlikely(!ir)) {
89 return false;
90 }
91 }
92
93 24 size_t name_len = strlen(name);
94 24 UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1);
95 24 ft->type = type;
96
97
2/2
✓ Branch 0 (8→9) taken 20 times.
✓ Branch 1 (8→10) taken 4 times.
24 if (ir) {
98 20 ft->u.regexp = ir;
99 } else {
100 4 size_t str_len = strlen(str);
101 4 FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1);
102 4 s->str_len = str_len;
103 4 ft->u.str = s;
104 4 memcpy(s->str, str, str_len + 1);
105 }
106
107 24 memcpy(ft->name, name, name_len + 1);
108 24 ptr_array_append(filetypes, ft);
109 24 return true;
110 }
111
112 297 static StringView path_extension(StringView filename)
113 {
114 297 StringView ext = filename;
115 297 ssize_t pos = strview_memrchr_idx(&ext, '.');
116
2/2
✓ Branch 0 (2→3) taken 116 times.
✓ Branch 1 (2→4) taken 181 times.
297 strview_remove_prefix(&ext, pos > 0 ? pos + 1 : ext.length);
117 297 return ext;
118 }
119
120 282 static StringView get_filename_extension(StringView filename)
121 {
122 282 StringView ext = path_extension(filename);
123
2/2
✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 267 times.
282 if (is_ignored_extension(ext)) {
124 15 filename.length -= ext.length + 1;
125 15 ext = path_extension(filename);
126 }
127 282 strview_remove_matching_suffix(&ext, "~");
128 282 return ext;
129 }
130
131 // Parse hashbang and return interpreter name, without version number.
132 // For example, if line is "#!/usr/bin/env python2", "python" is returned.
133 282 static StringView get_interpreter(StringView line)
134 {
135 282 StringView sv = STRING_VIEW_INIT;
136
2/2
✓ Branch 0 (3→4) taken 208 times.
✓ Branch 1 (3→5) taken 74 times.
282 if (!strview_remove_matching_prefix(&line, "#!")) {
137 208 return sv;
138 }
139
140 74 strview_trim_left(&line);
141
3/4
✓ Branch 0 (6→7) taken 74 times.
✗ Branch 1 (6→8) not taken.
✓ Branch 2 (7→8) taken 1 times.
✓ Branch 3 (7→9) taken 73 times.
74 if (line.length < 2 || line.data[0] != '/') {
142 1 return sv;
143 }
144
145 73 size_t pos = 0;
146 73 sv = get_delim(line.data, &pos, line.length, ' ');
147
4/4
✓ Branch 0 (10→11) taken 8 times.
✓ Branch 1 (10→17) taken 65 times.
✓ Branch 2 (12→14) taken 6 times.
✓ Branch 3 (12→17) taken 2 times.
73 if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) {
148
4/4
✓ Branch 0 (14→15) taken 6 times.
✓ Branch 1 (14→16) taken 1 times.
✓ Branch 2 (15→13) taken 1 times.
✓ Branch 3 (15→16) taken 5 times.
7 while (pos + 1 < line.length && line.data[pos] == ' ') {
149 1 pos++;
150 }
151 6 sv = get_delim(line.data, &pos, line.length, ' ');
152 }
153
154 73 ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/');
155
2/2
✓ Branch 0 (17→18) taken 68 times.
✓ Branch 1 (17→20) taken 5 times.
73 if (last_slash_idx >= 0) {
156 68 strview_remove_prefix(&sv, last_slash_idx + 1);
157 }
158
159
4/4
✓ Branch 0 (21→22) taken 84 times.
✓ Branch 1 (21→23) taken 5 times.
✓ Branch 2 (22→19) taken 16 times.
✓ Branch 3 (22→23) taken 68 times.
89 while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) {
160 16 sv.length--;
161 }
162
163 73 return sv;
164 }
165
166 24 static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv)
167 {
168 24 const FlexArrayStr *s = ft->u.str;
169
4/4
✓ Branch 0 (2→3) taken 15 times.
✓ Branch 1 (2→6) taken 9 times.
✓ Branch 2 (4→5) taken 12 times.
✓ Branch 3 (4→6) taken 3 times.
24 return sv.length > 0 && strview_equal_strn(&sv, s->str, s->str_len);
170 }
171
172 58 static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv)
173 {
174 58 const regex_t *re = &ft->u.regexp->re;
175
4/4
✓ Branch 0 (2→3) taken 52 times.
✓ Branch 1 (2→6) taken 6 times.
✓ Branch 2 (4→5) taken 50 times.
✓ Branch 3 (4→6) taken 2 times.
58 return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, NULL, 0);
176 }
177
178 82 static bool ft_match(const UserFileTypeEntry *ft, const StringView sv)
179 {
180 82 FileDetectionType t = ft->type;
181
2/2
✓ Branch 0 (2→3) taken 58 times.
✓ Branch 1 (2→4) taken 24 times.
82 return ft_uses_regex(t) ? ft_regex_match(ft, sv) : ft_str_match(ft, sv);
182 }
183
184 typedef FileTypeEnum (*FileTypeLookupFunc)(const StringView sv);
185
186 282 const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line)
187 {
188
2/2
✓ Branch 0 (2→3) taken 153 times.
✓ Branch 1 (2→4) taken 129 times.
282 const char *b = filename ? path_basename(filename) : NULL;
189 282 const StringView base = strview_from_cstring(b);
190 282 const StringView ext = get_filename_extension(base);
191 282 const StringView path = strview_from_cstring(filename);
192 282 const StringView interpreter = get_interpreter(line);
193 282 BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0));
194 282 BUG_ON(line.length == 0 && interpreter.length != 0);
195
196 // The order of elements in this array determines the order of
197 // precedence for the lookup() functions (but note that changing
198 // the initializer below makes no difference to the array order)
199 282 static const FileTypeLookupFunc funcs[] = {
200 [FT_INTERPRETER] = filetype_from_interpreter,
201 [FT_BASENAME] = filetype_from_basename,
202 [FT_CONTENT] = filetype_from_signature,
203 [FT_EXTENSION] = filetype_from_extension,
204 [FT_FILENAME] = filetype_from_dir_prefix,
205 };
206
207 282 const StringView params[] = {
208 [FT_INTERPRETER] = interpreter,
209 [FT_BASENAME] = base,
210 [FT_CONTENT] = line,
211 [FT_EXTENSION] = ext,
212 [FT_FILENAME] = path,
213 };
214
215 // Search user `ft` entries
216
2/2
✓ Branch 0 (18→14) taken 82 times.
✓ Branch 1 (18→25) taken 277 times.
359 for (size_t i = 0, n = filetypes->count; i < n; i++) {
217 82 const UserFileTypeEntry *ft = filetypes->ptrs[i];
218
2/2
✓ Branch 0 (15→16) taken 5 times.
✓ Branch 1 (15→17) taken 77 times.
82 if (ft_match(ft, params[ft->type])) {
219 5 return ft->name;
220 }
221 }
222
223 // Search built-in lookup tables
224 static_assert(ARRAYLEN(funcs) == ARRAYLEN(params));
225
2/2
✓ Branch 0 (25→19) taken 919 times.
✓ Branch 1 (25→26) taken 94 times.
1013 for (FileDetectionType i = 0; i < ARRAYLEN(funcs); i++) {
226 919 BUG_ON(!funcs[i]);
227 919 FileTypeEnum ft = funcs[i](params[i]);
228
2/2
✓ Branch 0 (22→23) taken 183 times.
✓ Branch 1 (22→24) taken 736 times.
919 if (ft != NONE) {
229 183 return builtin_filetype_names[ft];
230 }
231 }
232
233 // Use "ini" filetype if first line looks like an ini [section]
234 94 strview_trim_right(&line);
235
2/2
✓ Branch 0 (27→28) taken 54 times.
✓ Branch 1 (27→32) taken 40 times.
94 if (line.length >= 4) {
236 54 const char *s = line.data;
237 54 const size_t n = line.length;
238
6/6
✓ Branch 0 (28→29) taken 5 times.
✓ Branch 1 (28→32) taken 49 times.
✓ Branch 2 (29→30) taken 3 times.
✓ Branch 3 (29→32) taken 2 times.
✓ Branch 4 (30→31) taken 2 times.
✓ Branch 5 (30→32) taken 1 times.
54 if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) {
239
1/2
✗ Branch 0 (31→32) not taken.
✓ Branch 1 (31→48) taken 2 times.
2 if (!strview_contains_char_type(&line, ASCII_CNTRL)) {
240 return builtin_filetype_names[INI];
241 }
242 }
243 }
244
245
2/2
✓ Branch 0 (33→34) taken 2 times.
✓ Branch 1 (33→48) taken 90 times.
92 if (strview_equal_cstring(&ext, "conf")) {
246
2/2
✓ Branch 0 (35→36) taken 1 times.
✓ Branch 1 (35→47) taken 1 times.
2 if (strview_has_prefix(&path, "/etc/systemd/")) {
247 2 return builtin_filetype_names[INI];
248 }
249 1 BUG_ON(!filename);
250 1 const StringView dir = path_slice_dirname(filename);
251
1/2
✗ Branch 0 (39→40) not taken.
✓ Branch 1 (39→47) taken 1 times.
1 if (
252 1 strview_has_prefix(&path, "/etc/")
253 || strview_has_prefix(&path, "/usr/share/")
254 || strview_has_prefix(&path, "/usr/local/share/")
255 || strview_has_suffix(&dir, "/tmpfiles.d")
256 ) {
257 return builtin_filetype_names[CONFIG];
258 }
259 }
260
261 return NULL;
262 }
263
264 31 bool is_ft(const PointerArray *filetypes, const char *name)
265 {
266
2/2
✓ Branch 0 (3→4) taken 14 times.
✓ Branch 1 (3→8) taken 17 times.
31 if (BSEARCH(name, builtin_filetype_names, vstrcmp)) {
267 return true;
268 }
269
270
2/2
✓ Branch 0 (7→5) taken 26 times.
✓ Branch 1 (7→8) taken 8 times.
34 for (size_t i = 0, n = filetypes->count; i < n; i++) {
271 26 const UserFileTypeEntry *ft = filetypes->ptrs[i];
272
2/2
✓ Branch 0 (5→6) taken 20 times.
✓ Branch 1 (5→8) taken 6 times.
26 if (streq(ft->name, name)) {
273 return true;
274 }
275 }
276
277 return false;
278 }
279
280 2 void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix)
281 {
282 // Insert all filetype names beginning with `prefix` into a HashSet
283 // (to avoid duplicates)
284 2 HashSet set;
285 2 size_t prefix_len = strlen(prefix);
286 2 size_t nr_builtin_ft = ARRAYLEN(builtin_filetype_names);
287
1/2
✓ Branch 0 (2→3) taken 2 times.
✗ Branch 1 (2→4) not taken.
2 hashset_init(&set, 20 + (prefix[0] == '\0' ? nr_builtin_ft : 0), false);
288
289
2/2
✓ Branch 0 (9→6) taken 250 times.
✓ Branch 1 (9→10) taken 2 times.
252 for (size_t i = 0; i < nr_builtin_ft; i++) {
290 250 const char *name = builtin_filetype_names[i];
291
2/2
✓ Branch 0 (6→7) taken 2 times.
✓ Branch 1 (6→8) taken 248 times.
250 if (str_has_strn_prefix(name, prefix, prefix_len)) {
292 2 hashset_insert(&set, name, strlen(name));
293 }
294 }
295
296
1/2
✗ Branch 0 (14→11) not taken.
✓ Branch 1 (14→15) taken 2 times.
2 for (size_t i = 0, n = filetypes->count; i < n; i++) {
297 const UserFileTypeEntry *ft = filetypes->ptrs[i];
298 const char *name = ft->name;
299 if (str_has_strn_prefix(name, prefix, prefix_len)) {
300 hashset_insert(&set, name, strlen(name));
301 }
302 }
303
304 // Append the collected strings to the PointerArray
305
2/2
✓ Branch 0 (19→16) taken 2 times.
✓ Branch 1 (19→20) taken 2 times.
4 for (HashSetIter iter = hashset_iter(&set); hashset_next(&iter); ) {
306 2 ptr_array_append(a, xmemdup(iter.entry->str, iter.entry->str_len + 1));
307 }
308
309 2 hashset_free(&set);
310 2 }
311
312 3 static const char *ft_get_str(const UserFileTypeEntry *ft)
313 {
314
2/2
✓ Branch 0 (2→3) taken 2 times.
✓ Branch 1 (2→4) taken 1 times.
3 return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str;
315 }
316
317 1 String dump_filetypes(const PointerArray *filetypes)
318 {
319 1 static const char flags[][4] = {
320 [FT_EXTENSION] = "",
321 [FT_FILENAME] = "-f ",
322 [FT_CONTENT] = "-c ",
323 [FT_INTERPRETER] = "-i ",
324 [FT_BASENAME] = "-b ",
325 };
326
327 1 String s = string_new(4096);
328
2/2
✓ Branch 0 (15→4) taken 3 times.
✓ Branch 1 (15→16) taken 1 times.
4 for (size_t i = 0, n = filetypes->count; i < n; i++) {
329 3 const UserFileTypeEntry *ft = filetypes->ptrs[i];
330 3 BUG_ON(ft->type >= ARRAYLEN(flags));
331 3 BUG_ON(ft->name[0] == '-');
332 3 string_append_literal(&s, "ft ");
333 3 string_append_cstring(&s, flags[ft->type]);
334 3 string_append_escaped_arg(&s, ft->name, true);
335 3 string_append_byte(&s, ' ');
336 3 string_append_escaped_arg(&s, ft_get_str(ft), true);
337 3 string_append_byte(&s, '\n');
338 }
339 1 return s;
340 }
341
342 24 static void free_filetype_entry(UserFileTypeEntry *ft)
343 {
344
2/2
✓ Branch 0 (2→3) taken 4 times.
✓ Branch 1 (2→4) taken 20 times.
24 if (!ft_uses_regex(ft->type)) {
345 4 free(ft->u.str);
346 }
347 24 free(ft);
348 24 }
349
350 12 void free_filetypes(PointerArray *filetypes)
351 {
352 12 ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry));
353 12 }
354
355 3534 bool is_valid_filetype_name_sv(StringView name)
356 {
357 3534 const char *data = name.data;
358 3534 const size_t len = name.length;
359
4/4
✓ Branch 0 (2→3) taken 3525 times.
✓ Branch 1 (2→8) taken 9 times.
✓ Branch 2 (3→7) taken 3522 times.
✓ Branch 3 (3→8) taken 3 times.
3534 if (unlikely(len == 0 || len > FILETYPE_NAME_MAX || data[0] == '-')) {
360 return false;
361 }
362
363 const AsciiCharType mask = ASCII_SPACE | ASCII_CNTRL;
364
2/2
✓ Branch 0 (7→4) taken 17853 times.
✓ Branch 1 (7→8) taken 3512 times.
21365 for (size_t i = 0; i < len; i++) {
365 17853 unsigned char ch = data[i];
366
4/4
✓ Branch 0 (4→5) taken 17845 times.
✓ Branch 1 (4→8) taken 8 times.
✓ Branch 2 (5→6) taken 17843 times.
✓ Branch 3 (5→8) taken 2 times.
17853 if (unlikely(ascii_test(ch, mask) || ch == '/')) {
367 return false;
368 }
369 }
370
371 return true;
372 }
373
374 const char *filetype_str_from_extension(const char *path)
375 {
376 StringView base = strview(path_basename(path));
377 StringView ext = get_filename_extension(base);
378 FileTypeEnum ft = filetype_from_extension(ext);
379 return (ft == NONE) ? NULL : builtin_filetype_names[ft];
380 }
381