dte test coverage


Directory: ./
File: src/filetype.c
Date: 2024-12-21 16:03:22
Exec Total Coverage
Lines: 153 160 95.6%
Functions: 18 18 100.0%
Branches: 92 106 86.8%

Line Branch Exec Source
1 #include <stdint.h>
2 #include <stdlib.h>
3 #include "filetype.h"
4 #include "command/serialize.h"
5 #include "regexp.h"
6 #include "util/array.h"
7 #include "util/ascii.h"
8 #include "util/bsearch.h"
9 #include "util/debug.h"
10 #include "util/log.h"
11 #include "util/path.h"
12 #include "util/str-util.h"
13 #include "util/xmalloc.h"
14 #include "util/xmemmem.h"
15
16 1853 static int ft_compare(const void *key, const void *elem)
17 {
18 1853 const StringView *sv = key;
19 1853 const char *ext = elem; // Cast to first member of struct
20 1853 int res = memcmp(sv->data, ext, sv->length);
21
4/4
✓ Branch 0 taken 151 times.
✓ Branch 1 taken 1702 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 139 times.
1853 if (unlikely(res == 0 && ext[sv->length] != '\0')) {
22 12 res = -1;
23 }
24 1853 return res;
25 }
26
27 // Built-in filetypes
28 // NOLINTBEGIN(bugprone-suspicious-include)
29 #include "filetype/names.c"
30 #include "filetype/basenames.c"
31 #include "filetype/directories.c"
32 #include "filetype/extensions.c"
33 #include "filetype/interpreters.c"
34 #include "filetype/ignored-exts.c"
35 #include "filetype/signatures.c"
36 // NOLINTEND(bugprone-suspicious-include)
37
38 18 UNITTEST {
39 18 static_assert(NR_BUILTIN_FILETYPES < 256);
40 18 CHECK_BSEARCH_ARRAY(basenames, name, strcmp);
41 18 CHECK_BSEARCH_ARRAY(extensions, ext, strcmp);
42 18 CHECK_BSEARCH_ARRAY(interpreters, key, strcmp);
43 18 CHECK_BSEARCH_ARRAY(emacs_modes, name, strcmp);
44 18 CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp);
45 18 CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp);
46
47
2/2
✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 18 times.
2142 for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) {
48 2124 const char *name = builtin_filetype_names[i];
49
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2124 times.
2124 if (unlikely(!is_valid_filetype_name(name))) {
50 BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name);
51 }
52 }
53 18 }
54
55 typedef struct {
56 unsigned int str_len;
57 char str[] COUNTED_BY(str_len);
58 } FlexArrayStr;
59
60 // Filetypes dynamically added via the `ft` command.
61 // Not grouped by name to make it possible to order them freely.
62 typedef struct {
63 union {
64 FlexArrayStr *str;
65 const InternedRegexp *regexp;
66 } u;
67 uint8_t type; // FileDetectionType
68 char name[];
69 } UserFileTypeEntry;
70
71 103 static bool ft_uses_regex(FileDetectionType type)
72 {
73 103 return type == FT_CONTENT || type == FT_FILENAME;
74 }
75
76 18 bool add_filetype(PointerArray *filetypes, const char *name, const char *str, FileDetectionType type)
77 {
78 18 BUG_ON(!is_valid_filetype_name(name));
79 18 const InternedRegexp *ir = NULL;
80
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
18 if (ft_uses_regex(type)) {
81 14 ir = regexp_intern(str);
82
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (unlikely(!ir)) {
83 return false;
84 }
85 }
86
87 18 size_t name_len = strlen(name);
88 18 UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1);
89 18 ft->type = type;
90
91
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
18 if (ir) {
92 14 ft->u.regexp = ir;
93 } else {
94 4 size_t str_len = strlen(str);
95 4 FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1);
96 4 s->str_len = str_len;
97 4 ft->u.str = s;
98 4 memcpy(s->str, str, str_len + 1);
99 }
100
101 18 memcpy(ft->name, name, name_len + 1);
102 18 ptr_array_append(filetypes, ft);
103 18 return true;
104 }
105
106 282 static StringView path_extension(StringView filename)
107 {
108 282 StringView ext = filename;
109 282 ssize_t pos = strview_memrchr_idx(&ext, '.');
110
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 172 times.
282 strview_remove_prefix(&ext, pos > 0 ? pos + 1 : ext.length);
111 282 return ext;
112 }
113
114 267 static StringView get_filename_extension(StringView filename)
115 {
116 267 StringView ext = path_extension(filename);
117
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 252 times.
267 if (is_ignored_extension(ext)) {
118 15 filename.length -= ext.length + 1;
119 15 ext = path_extension(filename);
120 }
121 267 strview_remove_matching_suffix(&ext, "~");
122 267 return ext;
123 }
124
125 // Parse hashbang and return interpreter name, without version number.
126 // For example, if line is "#!/usr/bin/env python2", "python" is returned.
127 267 static StringView get_interpreter(StringView line)
128 {
129 267 StringView sv = STRING_VIEW_INIT;
130
2/2
✓ Branch 0 taken 196 times.
✓ Branch 1 taken 71 times.
267 if (!strview_remove_matching_prefix(&line, "#!")) {
131 196 return sv;
132 }
133
134 71 strview_trim_left(&line);
135
3/4
✓ Branch 0 taken 71 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 70 times.
71 if (line.length < 2 || line.data[0] != '/') {
136 1 return sv;
137 }
138
139 70 size_t pos = 0;
140 70 sv = get_delim(line.data, &pos, line.length, ' ');
141
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 62 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 2 times.
70 if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) {
142
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 5 times.
7 while (pos + 1 < line.length && line.data[pos] == ' ') {
143 1 pos++;
144 }
145 6 sv = get_delim(line.data, &pos, line.length, ' ');
146 }
147
148 70 ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/');
149
2/2
✓ Branch 0 taken 65 times.
✓ Branch 1 taken 5 times.
70 if (last_slash_idx >= 0) {
150 65 strview_remove_prefix(&sv, last_slash_idx + 1);
151 }
152
153
4/4
✓ Branch 0 taken 81 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 65 times.
86 while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) {
154 16 sv.length--;
155 }
156
157 70 return sv;
158 }
159
160 18 static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv)
161 {
162 18 const FlexArrayStr *s = ft->u.str;
163
4/4
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3 times.
18 return sv.length > 0 && strview_equal_strn(&sv, s->str, s->str_len);
164 }
165
166 46 static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv)
167 {
168 46 const regex_t *re = &ft->u.regexp->re;
169 46 regmatch_t m;
170
4/4
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 38 times.
✓ Branch 3 taken 2 times.
46 return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0);
171 }
172
173 64 static bool ft_match(const UserFileTypeEntry *ft, const StringView sv)
174 {
175 64 FileDetectionType t = ft->type;
176
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 18 times.
64 return ft_uses_regex(t) ? ft_regex_match(ft, sv) : ft_str_match(ft, sv);
177 }
178
179 typedef FileTypeEnum (*FileTypeLookupFunc)(const StringView sv);
180
181 267 const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line)
182 {
183
2/2
✓ Branch 0 taken 147 times.
✓ Branch 1 taken 120 times.
267 const char *b = filename ? path_basename(filename) : NULL;
184 267 const StringView base = strview_from_cstring(b);
185 267 const StringView ext = get_filename_extension(base);
186 267 const StringView path = strview_from_cstring(filename);
187 267 const StringView interpreter = get_interpreter(line);
188 267 BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0));
189 267 BUG_ON(line.length == 0 && interpreter.length != 0);
190
191 // The order of elements in this array determines the order of
192 // precedence for the lookup() functions (but note that changing
193 // the initializer below makes no difference to the array order)
194 267 static const FileTypeLookupFunc funcs[] = {
195 [FT_INTERPRETER] = filetype_from_interpreter,
196 [FT_BASENAME] = filetype_from_basename,
197 [FT_CONTENT] = filetype_from_signature,
198 [FT_EXTENSION] = filetype_from_extension,
199 [FT_FILENAME] = filetype_from_dir_prefix,
200 };
201
202 267 const StringView params[] = {
203 [FT_INTERPRETER] = interpreter,
204 [FT_BASENAME] = base,
205 [FT_CONTENT] = line,
206 [FT_EXTENSION] = ext,
207 [FT_FILENAME] = path,
208 };
209
210 // Search user `ft` entries
211
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 262 times.
326 for (size_t i = 0, n = filetypes->count; i < n; i++) {
212 64 const UserFileTypeEntry *ft = filetypes->ptrs[i];
213
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 59 times.
64 if (ft_match(ft, params[ft->type])) {
214 5 return ft->name;
215 }
216 }
217
218 // Search built-in lookup tables
219 static_assert(ARRAYLEN(funcs) == ARRAYLEN(params));
220
2/2
✓ Branch 0 taken 863 times.
✓ Branch 1 taken 86 times.
949 for (FileDetectionType i = 0; i < ARRAYLEN(funcs); i++) {
221 863 BUG_ON(!funcs[i]);
222 863 FileTypeEnum ft = funcs[i](params[i]);
223
2/2
✓ Branch 0 taken 176 times.
✓ Branch 1 taken 687 times.
863 if (ft != NONE) {
224 176 return builtin_filetype_names[ft];
225 }
226 }
227
228 // Use "ini" filetype if first line looks like an ini [section]
229 86 strview_trim_right(&line);
230
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 41 times.
86 if (line.length >= 4) {
231 45 const char *s = line.data;
232 45 const size_t n = line.length;
233
6/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 40 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 1 times.
45 if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) {
234
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (!strview_contains_char_type(&line, ASCII_CNTRL)) {
235 return builtin_filetype_names[INI];
236 }
237 }
238 }
239
240
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 82 times.
84 if (strview_equal_cstring(&ext, "conf")) {
241
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (strview_has_prefix(&path, "/etc/systemd/")) {
242 2 return builtin_filetype_names[INI];
243 }
244 1 BUG_ON(!filename);
245 1 const StringView dir = path_slice_dirname(filename);
246
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (
247 1 strview_has_prefix(&path, "/etc/")
248 || strview_has_prefix(&path, "/usr/share/")
249 || strview_has_prefix(&path, "/usr/local/share/")
250 || strview_has_suffix(&dir, "/tmpfiles.d")
251 ) {
252 return builtin_filetype_names[CONFIG];
253 }
254 }
255
256 return NULL;
257 }
258
259 30 bool is_ft(const PointerArray *filetypes, const char *name)
260 {
261
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 16 times.
30 if (BSEARCH(name, builtin_filetype_names, vstrcmp)) {
262 return true;
263 }
264
265
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 8 times.
34 for (size_t i = 0, n = filetypes->count; i < n; i++) {
266 26 const UserFileTypeEntry *ft = filetypes->ptrs[i];
267
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 6 times.
26 if (streq(ft->name, name)) {
268 return true;
269 }
270 }
271
272 return false;
273 }
274
275 2 void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix)
276 {
277 2 COLLECT_STRINGS(builtin_filetype_names, a, prefix);
278
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 for (size_t i = 0, n = filetypes->count; i < n; i++) {
279 const UserFileTypeEntry *ft = filetypes->ptrs[i];
280 const char *name = ft->name;
281 if (str_has_prefix(name, prefix)) {
282 ptr_array_append(a, xstrdup(name));
283 }
284 }
285 2 }
286
287 3 static const char *ft_get_str(const UserFileTypeEntry *ft)
288 {
289
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str;
290 }
291
292 1 String dump_filetypes(const PointerArray *filetypes)
293 {
294 1 static const char flags[][4] = {
295 [FT_EXTENSION] = "",
296 [FT_FILENAME] = "-f ",
297 [FT_CONTENT] = "-c ",
298 [FT_INTERPRETER] = "-i ",
299 [FT_BASENAME] = "-b ",
300 };
301
302 1 String s = string_new(4096);
303
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 for (size_t i = 0, n = filetypes->count; i < n; i++) {
304 3 const UserFileTypeEntry *ft = filetypes->ptrs[i];
305 3 BUG_ON(ft->type >= ARRAYLEN(flags));
306 3 BUG_ON(ft->name[0] == '-');
307 3 string_append_literal(&s, "ft ");
308 3 string_append_cstring(&s, flags[ft->type]);
309 3 string_append_escaped_arg(&s, ft->name, true);
310 3 string_append_byte(&s, ' ');
311 3 string_append_escaped_arg(&s, ft_get_str(ft), true);
312 3 string_append_byte(&s, '\n');
313 }
314 1 return s;
315 }
316
317 18 static void free_filetype_entry(UserFileTypeEntry *ft)
318 {
319
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 14 times.
18 if (!ft_uses_regex(ft->type)) {
320 4 free(ft->u.str);
321 }
322 18 free(ft);
323 18 }
324
325 9 void free_filetypes(PointerArray *filetypes)
326 {
327 9 ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry));
328 9 }
329
330 2501 bool is_valid_filetype_name_sv(const StringView *name)
331 {
332 2501 const char *data = name->data;
333 2501 const size_t len = name->length;
334
4/4
✓ Branch 0 taken 2492 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 2489 times.
✓ Branch 3 taken 3 times.
2501 if (unlikely(len == 0 || len > FILETYPE_NAME_MAX || data[0] == '-')) {
335 return false;
336 }
337
338 const AsciiCharType mask = ASCII_SPACE | ASCII_CNTRL;
339
2/2
✓ Branch 0 taken 12563 times.
✓ Branch 1 taken 2479 times.
15042 for (size_t i = 0; i < len; i++) {
340 12563 unsigned char ch = data[i];
341
4/4
✓ Branch 0 taken 12555 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 12553 times.
✓ Branch 3 taken 2 times.
12563 if (unlikely(ascii_test(ch, mask) || ch == '/')) {
342 return false;
343 }
344 }
345
346 return true;
347 }
348