Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <stdint.h> | ||
2 | #include <stdlib.h> | ||
3 | #include "filetype.h" | ||
4 | #include "command/serialize.h" | ||
5 | #include "regexp.h" | ||
6 | #include "util/array.h" | ||
7 | #include "util/ascii.h" | ||
8 | #include "util/bsearch.h" | ||
9 | #include "util/debug.h" | ||
10 | #include "util/hashset.h" | ||
11 | #include "util/log.h" | ||
12 | #include "util/path.h" | ||
13 | #include "util/str-util.h" | ||
14 | #include "util/strtonum.h" | ||
15 | #include "util/xmalloc.h" | ||
16 | #include "util/xmemmem.h" | ||
17 | |||
18 | 1972 | static int ft_compare(const void *key, const void *elem) | |
19 | { | ||
20 | 1972 | const StringView *sv = key; | |
21 | 1972 | const char *ext = elem; // Cast to first member of struct | |
22 | 1972 | int res = memcmp(sv->data, ext, sv->length); | |
23 |
4/4✓ Branch 0 (2→3) taken 152 times.
✓ Branch 1 (2→5) taken 1820 times.
✓ Branch 2 (3→4) taken 13 times.
✓ Branch 3 (3→5) taken 139 times.
|
1972 | if (unlikely(res == 0 && ext[sv->length] != '\0')) { |
24 | 13 | res = -1; | |
25 | } | ||
26 | 1972 | return res; | |
27 | } | ||
28 | |||
29 | // Built-in filetypes | ||
30 | // NOLINTBEGIN(bugprone-suspicious-include) | ||
31 | #include "filetype/names.c" | ||
32 | #include "filetype/basenames.c" | ||
33 | #include "filetype/directories.c" | ||
34 | #include "filetype/extensions.c" | ||
35 | #include "filetype/interpreters.c" | ||
36 | #include "filetype/ignored-exts.c" | ||
37 | #include "filetype/signatures.c" | ||
38 | // NOLINTEND(bugprone-suspicious-include) | ||
39 | |||
40 | 18 | UNITTEST { | |
41 | 18 | static_assert(NR_BUILTIN_FILETYPES < 256); | |
42 | 18 | CHECK_BSEARCH_ARRAY(basenames, name, strcmp); | |
43 | 18 | CHECK_BSEARCH_ARRAY(extensions, ext, strcmp); | |
44 | 18 | CHECK_BSEARCH_ARRAY(interpreters, key, strcmp); | |
45 | 18 | CHECK_BSEARCH_ARRAY(emacs_modes, name, strcmp); | |
46 | 18 | CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp); | |
47 | 18 | CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp); | |
48 | |||
49 |
2/2✓ Branch 0 (12→9) taken 2178 times.
✓ Branch 1 (12→13) taken 18 times.
|
2196 | for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) { |
50 | 2178 | const char *name = builtin_filetype_names[i]; | |
51 |
1/2✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→11) taken 2178 times.
|
2178 | if (unlikely(!is_valid_filetype_name(name))) { |
52 | − | BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name); | |
53 | } | ||
54 | } | ||
55 | 18 | } | |
56 | |||
57 | typedef struct { | ||
58 | unsigned int str_len; | ||
59 | char str[] COUNTED_BY(str_len); | ||
60 | } FlexArrayStr; | ||
61 | |||
62 | // Filetypes dynamically added via the `ft` command. | ||
63 | // Not grouped by name to make it possible to order them freely. | ||
64 | typedef struct { | ||
65 | union { | ||
66 | FlexArrayStr *str; | ||
67 | const InternedRegexp *regexp; | ||
68 | } u; | ||
69 | uint8_t type; // FileDetectionType | ||
70 | char name[]; | ||
71 | } UserFileTypeEntry; | ||
72 | |||
73 | 121 | static bool ft_uses_regex(FileDetectionType type) | |
74 | { | ||
75 | 121 | return type == FT_CONTENT || type == FT_FILENAME; | |
76 | } | ||
77 | |||
78 | 18 | bool add_filetype ( | |
79 | PointerArray *filetypes, | ||
80 | const char *name, | ||
81 | const char *str, | ||
82 | FileDetectionType type, | ||
83 | ErrorBuffer *ebuf | ||
84 | ) { | ||
85 | 18 | BUG_ON(!is_valid_filetype_name(name)); | |
86 | 18 | const InternedRegexp *ir = NULL; | |
87 |
2/2✓ Branch 0 (4→5) taken 14 times.
✓ Branch 1 (4→7) taken 4 times.
|
18 | if (ft_uses_regex(type)) { |
88 | 14 | ir = regexp_intern(ebuf, str); | |
89 |
1/2✓ Branch 0 (6→7) taken 14 times.
✗ Branch 1 (6→14) not taken.
|
14 | if (unlikely(!ir)) { |
90 | return false; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | 18 | size_t name_len = strlen(name); | |
95 | 18 | UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1); | |
96 | 18 | ft->type = type; | |
97 | |||
98 |
2/2✓ Branch 0 (8→9) taken 14 times.
✓ Branch 1 (8→10) taken 4 times.
|
18 | if (ir) { |
99 | 14 | ft->u.regexp = ir; | |
100 | } else { | ||
101 | 4 | size_t str_len = strlen(str); | |
102 | 4 | FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1); | |
103 | 4 | s->str_len = str_len; | |
104 | 4 | ft->u.str = s; | |
105 | 4 | memcpy(s->str, str, str_len + 1); | |
106 | } | ||
107 | |||
108 | 18 | memcpy(ft->name, name, name_len + 1); | |
109 | 18 | ptr_array_append(filetypes, ft); | |
110 | 18 | return true; | |
111 | } | ||
112 | |||
113 | 294 | static StringView path_extension(StringView filename) | |
114 | { | ||
115 | 294 | StringView ext = filename; | |
116 | 294 | ssize_t pos = strview_memrchr_idx(&ext, '.'); | |
117 |
2/2✓ Branch 0 (2→3) taken 116 times.
✓ Branch 1 (2→4) taken 178 times.
|
294 | strview_remove_prefix(&ext, pos > 0 ? pos + 1 : ext.length); |
118 | 294 | return ext; | |
119 | } | ||
120 | |||
121 | 279 | static StringView get_filename_extension(StringView filename) | |
122 | { | ||
123 | 279 | StringView ext = path_extension(filename); | |
124 |
2/2✓ Branch 0 (4→5) taken 15 times.
✓ Branch 1 (4→6) taken 264 times.
|
279 | if (is_ignored_extension(ext)) { |
125 | 15 | filename.length -= ext.length + 1; | |
126 | 15 | ext = path_extension(filename); | |
127 | } | ||
128 | 279 | strview_remove_matching_suffix(&ext, "~"); | |
129 | 279 | return ext; | |
130 | } | ||
131 | |||
132 | // Parse hashbang and return interpreter name, without version number. | ||
133 | // For example, if line is "#!/usr/bin/env python2", "python" is returned. | ||
134 | 279 | static StringView get_interpreter(StringView line) | |
135 | { | ||
136 | 279 | StringView sv = STRING_VIEW_INIT; | |
137 |
2/2✓ Branch 0 (3→4) taken 208 times.
✓ Branch 1 (3→5) taken 71 times.
|
279 | if (!strview_remove_matching_prefix(&line, "#!")) { |
138 | 208 | return sv; | |
139 | } | ||
140 | |||
141 | 71 | strview_trim_left(&line); | |
142 |
3/4✓ Branch 0 (6→7) taken 71 times.
✗ Branch 1 (6→8) not taken.
✓ Branch 2 (7→8) taken 1 times.
✓ Branch 3 (7→9) taken 70 times.
|
71 | if (line.length < 2 || line.data[0] != '/') { |
143 | 1 | return sv; | |
144 | } | ||
145 | |||
146 | 70 | size_t pos = 0; | |
147 | 70 | sv = get_delim(line.data, &pos, line.length, ' '); | |
148 |
4/4✓ Branch 0 (10→11) taken 8 times.
✓ Branch 1 (10→17) taken 62 times.
✓ Branch 2 (12→14) taken 6 times.
✓ Branch 3 (12→17) taken 2 times.
|
70 | if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) { |
149 |
4/4✓ Branch 0 (14→15) taken 6 times.
✓ Branch 1 (14→16) taken 1 times.
✓ Branch 2 (15→13) taken 1 times.
✓ Branch 3 (15→16) taken 5 times.
|
7 | while (pos + 1 < line.length && line.data[pos] == ' ') { |
150 | 1 | pos++; | |
151 | } | ||
152 | 6 | sv = get_delim(line.data, &pos, line.length, ' '); | |
153 | } | ||
154 | |||
155 | 70 | ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/'); | |
156 |
2/2✓ Branch 0 (17→18) taken 65 times.
✓ Branch 1 (17→20) taken 5 times.
|
70 | if (last_slash_idx >= 0) { |
157 | 65 | strview_remove_prefix(&sv, last_slash_idx + 1); | |
158 | } | ||
159 | |||
160 |
4/4✓ Branch 0 (21→22) taken 81 times.
✓ Branch 1 (21→23) taken 5 times.
✓ Branch 2 (22→19) taken 16 times.
✓ Branch 3 (22→23) taken 65 times.
|
86 | while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) { |
161 | 16 | sv.length--; | |
162 | } | ||
163 | |||
164 | 70 | return sv; | |
165 | } | ||
166 | |||
167 | 24 | static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv) | |
168 | { | ||
169 | 24 | const FlexArrayStr *s = ft->u.str; | |
170 |
4/4✓ Branch 0 (2→3) taken 15 times.
✓ Branch 1 (2→6) taken 9 times.
✓ Branch 2 (4→5) taken 12 times.
✓ Branch 3 (4→6) taken 3 times.
|
24 | return sv.length > 0 && strview_equal_strn(&sv, s->str, s->str_len); |
171 | } | ||
172 | |||
173 | 58 | static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv) | |
174 | { | ||
175 | 58 | const regex_t *re = &ft->u.regexp->re; | |
176 | 58 | regmatch_t m; | |
177 |
4/4✓ Branch 0 (2→3) taken 52 times.
✓ Branch 1 (2→6) taken 6 times.
✓ Branch 2 (4→5) taken 50 times.
✓ Branch 3 (4→6) taken 2 times.
|
58 | return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0); |
178 | } | ||
179 | |||
180 | 82 | static bool ft_match(const UserFileTypeEntry *ft, const StringView sv) | |
181 | { | ||
182 | 82 | FileDetectionType t = ft->type; | |
183 |
2/2✓ Branch 0 (2→3) taken 58 times.
✓ Branch 1 (2→4) taken 24 times.
|
82 | return ft_uses_regex(t) ? ft_regex_match(ft, sv) : ft_str_match(ft, sv); |
184 | } | ||
185 | |||
186 | typedef FileTypeEnum (*FileTypeLookupFunc)(const StringView sv); | ||
187 | |||
188 | 279 | const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line) | |
189 | { | ||
190 |
2/2✓ Branch 0 (2→3) taken 153 times.
✓ Branch 1 (2→4) taken 126 times.
|
279 | const char *b = filename ? path_basename(filename) : NULL; |
191 | 279 | const StringView base = strview_from_cstring(b); | |
192 | 279 | const StringView ext = get_filename_extension(base); | |
193 | 279 | const StringView path = strview_from_cstring(filename); | |
194 | 279 | const StringView interpreter = get_interpreter(line); | |
195 | 279 | BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0)); | |
196 | 279 | BUG_ON(line.length == 0 && interpreter.length != 0); | |
197 | |||
198 | // The order of elements in this array determines the order of | ||
199 | // precedence for the lookup() functions (but note that changing | ||
200 | // the initializer below makes no difference to the array order) | ||
201 | 279 | static const FileTypeLookupFunc funcs[] = { | |
202 | [FT_INTERPRETER] = filetype_from_interpreter, | ||
203 | [FT_BASENAME] = filetype_from_basename, | ||
204 | [FT_CONTENT] = filetype_from_signature, | ||
205 | [FT_EXTENSION] = filetype_from_extension, | ||
206 | [FT_FILENAME] = filetype_from_dir_prefix, | ||
207 | }; | ||
208 | |||
209 | 279 | const StringView params[] = { | |
210 | [FT_INTERPRETER] = interpreter, | ||
211 | [FT_BASENAME] = base, | ||
212 | [FT_CONTENT] = line, | ||
213 | [FT_EXTENSION] = ext, | ||
214 | [FT_FILENAME] = path, | ||
215 | }; | ||
216 | |||
217 | // Search user `ft` entries | ||
218 |
2/2✓ Branch 0 (18→14) taken 82 times.
✓ Branch 1 (18→25) taken 274 times.
|
356 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
219 | 82 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
220 |
2/2✓ Branch 0 (15→16) taken 5 times.
✓ Branch 1 (15→17) taken 77 times.
|
82 | if (ft_match(ft, params[ft->type])) { |
221 | 5 | return ft->name; | |
222 | } | ||
223 | } | ||
224 | |||
225 | // Search built-in lookup tables | ||
226 | static_assert(ARRAYLEN(funcs) == ARRAYLEN(params)); | ||
227 |
2/2✓ Branch 0 (25→19) taken 916 times.
✓ Branch 1 (25→26) taken 94 times.
|
1010 | for (FileDetectionType i = 0; i < ARRAYLEN(funcs); i++) { |
228 | 916 | BUG_ON(!funcs[i]); | |
229 | 916 | FileTypeEnum ft = funcs[i](params[i]); | |
230 |
2/2✓ Branch 0 (22→23) taken 180 times.
✓ Branch 1 (22→24) taken 736 times.
|
916 | if (ft != NONE) { |
231 | 180 | return builtin_filetype_names[ft]; | |
232 | } | ||
233 | } | ||
234 | |||
235 | // Use "ini" filetype if first line looks like an ini [section] | ||
236 | 94 | strview_trim_right(&line); | |
237 |
2/2✓ Branch 0 (27→28) taken 54 times.
✓ Branch 1 (27→32) taken 40 times.
|
94 | if (line.length >= 4) { |
238 | 54 | const char *s = line.data; | |
239 | 54 | const size_t n = line.length; | |
240 |
6/6✓ Branch 0 (28→29) taken 5 times.
✓ Branch 1 (28→32) taken 49 times.
✓ Branch 2 (29→30) taken 3 times.
✓ Branch 3 (29→32) taken 2 times.
✓ Branch 4 (30→31) taken 2 times.
✓ Branch 5 (30→32) taken 1 times.
|
54 | if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) { |
241 |
1/2✗ Branch 0 (31→32) not taken.
✓ Branch 1 (31→48) taken 2 times.
|
2 | if (!strview_contains_char_type(&line, ASCII_CNTRL)) { |
242 | return builtin_filetype_names[INI]; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 |
2/2✓ Branch 0 (33→34) taken 2 times.
✓ Branch 1 (33→48) taken 90 times.
|
92 | if (strview_equal_cstring(&ext, "conf")) { |
248 |
2/2✓ Branch 0 (35→36) taken 1 times.
✓ Branch 1 (35→47) taken 1 times.
|
2 | if (strview_has_prefix(&path, "/etc/systemd/")) { |
249 | 2 | return builtin_filetype_names[INI]; | |
250 | } | ||
251 | 1 | BUG_ON(!filename); | |
252 | 1 | const StringView dir = path_slice_dirname(filename); | |
253 |
1/2✗ Branch 0 (39→40) not taken.
✓ Branch 1 (39→47) taken 1 times.
|
1 | if ( |
254 | 1 | strview_has_prefix(&path, "/etc/") | |
255 | ✗ | || strview_has_prefix(&path, "/usr/share/") | |
256 | ✗ | || strview_has_prefix(&path, "/usr/local/share/") | |
257 | ✗ | || strview_has_suffix(&dir, "/tmpfiles.d") | |
258 | ) { | ||
259 | return builtin_filetype_names[CONFIG]; | ||
260 | } | ||
261 | } | ||
262 | |||
263 | return NULL; | ||
264 | } | ||
265 | |||
266 | 30 | bool is_ft(const PointerArray *filetypes, const char *name) | |
267 | { | ||
268 |
2/2✓ Branch 0 (3→4) taken 14 times.
✓ Branch 1 (3→8) taken 16 times.
|
30 | if (BSEARCH(name, builtin_filetype_names, vstrcmp)) { |
269 | return true; | ||
270 | } | ||
271 | |||
272 |
2/2✓ Branch 0 (7→5) taken 26 times.
✓ Branch 1 (7→8) taken 8 times.
|
34 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
273 | 26 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
274 |
2/2✓ Branch 0 (5→6) taken 20 times.
✓ Branch 1 (5→8) taken 6 times.
|
26 | if (streq(ft->name, name)) { |
275 | return true; | ||
276 | } | ||
277 | } | ||
278 | |||
279 | return false; | ||
280 | } | ||
281 | |||
282 | 2 | void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix) | |
283 | { | ||
284 | // Insert all filetype names beginning with `prefix` into a HashSet | ||
285 | // (to avoid duplicates) | ||
286 | 2 | HashSet set; | |
287 | 2 | size_t prefix_len = strlen(prefix); | |
288 | 2 | size_t nr_builtin_ft = ARRAYLEN(builtin_filetype_names); | |
289 |
1/2✓ Branch 0 (2→3) taken 2 times.
✗ Branch 1 (2→4) not taken.
|
2 | hashset_init(&set, 20 + (prefix[0] == '\0' ? nr_builtin_ft : 0), false); |
290 | |||
291 |
2/2✓ Branch 0 (9→6) taken 242 times.
✓ Branch 1 (9→10) taken 2 times.
|
244 | for (size_t i = 0; i < nr_builtin_ft; i++) { |
292 | 242 | const char *name = builtin_filetype_names[i]; | |
293 |
2/2✓ Branch 0 (6→7) taken 2 times.
✓ Branch 1 (6→8) taken 240 times.
|
242 | if (str_has_strn_prefix(name, prefix, prefix_len)) { |
294 | 2 | hashset_insert(&set, name, strlen(name)); | |
295 | } | ||
296 | } | ||
297 | |||
298 |
1/2✗ Branch 0 (14→11) not taken.
✓ Branch 1 (14→15) taken 2 times.
|
2 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
299 | ✗ | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
300 | ✗ | const char *name = ft->name; | |
301 | ✗ | if (str_has_strn_prefix(name, prefix, prefix_len)) { | |
302 | ✗ | hashset_insert(&set, name, strlen(name)); | |
303 | } | ||
304 | } | ||
305 | |||
306 | // Append the collected strings to the PointerArray | ||
307 |
2/2✓ Branch 0 (19→16) taken 2 times.
✓ Branch 1 (19→20) taken 2 times.
|
4 | for (HashSetIter iter = hashset_iter(&set); hashset_next(&iter); ) { |
308 | 2 | ptr_array_append(a, xmemdup(iter.entry->str, iter.entry->str_len + 1)); | |
309 | } | ||
310 | |||
311 | 2 | hashset_free(&set); | |
312 | 2 | } | |
313 | |||
314 | 3 | static const char *ft_get_str(const UserFileTypeEntry *ft) | |
315 | { | ||
316 |
2/2✓ Branch 0 (2→3) taken 2 times.
✓ Branch 1 (2→4) taken 1 times.
|
3 | return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str; |
317 | } | ||
318 | |||
319 | 1 | String dump_filetypes(const PointerArray *filetypes) | |
320 | { | ||
321 | 1 | static const char flags[][4] = { | |
322 | [FT_EXTENSION] = "", | ||
323 | [FT_FILENAME] = "-f ", | ||
324 | [FT_CONTENT] = "-c ", | ||
325 | [FT_INTERPRETER] = "-i ", | ||
326 | [FT_BASENAME] = "-b ", | ||
327 | }; | ||
328 | |||
329 | 1 | String s = string_new(4096); | |
330 |
2/2✓ Branch 0 (15→4) taken 3 times.
✓ Branch 1 (15→16) taken 1 times.
|
4 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
331 | 3 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
332 | 3 | BUG_ON(ft->type >= ARRAYLEN(flags)); | |
333 | 3 | BUG_ON(ft->name[0] == '-'); | |
334 | 3 | string_append_literal(&s, "ft "); | |
335 | 3 | string_append_cstring(&s, flags[ft->type]); | |
336 | 3 | string_append_escaped_arg(&s, ft->name, true); | |
337 | 3 | string_append_byte(&s, ' '); | |
338 | 3 | string_append_escaped_arg(&s, ft_get_str(ft), true); | |
339 | 3 | string_append_byte(&s, '\n'); | |
340 | } | ||
341 | 1 | return s; | |
342 | } | ||
343 | |||
344 | 18 | static void free_filetype_entry(UserFileTypeEntry *ft) | |
345 | { | ||
346 |
2/2✓ Branch 0 (2→3) taken 4 times.
✓ Branch 1 (2→4) taken 14 times.
|
18 | if (!ft_uses_regex(ft->type)) { |
347 | 4 | free(ft->u.str); | |
348 | } | ||
349 | 18 | free(ft); | |
350 | 18 | } | |
351 | |||
352 | 9 | void free_filetypes(PointerArray *filetypes) | |
353 | { | ||
354 | 9 | ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry)); | |
355 | 9 | } | |
356 | |||
357 | 2580 | bool is_valid_filetype_name_sv(const StringView *name) | |
358 | { | ||
359 | 2580 | const char *data = name->data; | |
360 | 2580 | const size_t len = name->length; | |
361 |
4/4✓ Branch 0 (2→3) taken 2571 times.
✓ Branch 1 (2→8) taken 9 times.
✓ Branch 2 (3→7) taken 2568 times.
✓ Branch 3 (3→8) taken 3 times.
|
2580 | if (unlikely(len == 0 || len > FILETYPE_NAME_MAX || data[0] == '-')) { |
362 | return false; | ||
363 | } | ||
364 | |||
365 | const AsciiCharType mask = ASCII_SPACE | ASCII_CNTRL; | ||
366 |
2/2✓ Branch 0 (7→4) taken 13068 times.
✓ Branch 1 (7→8) taken 2558 times.
|
15626 | for (size_t i = 0; i < len; i++) { |
367 | 13068 | unsigned char ch = data[i]; | |
368 |
4/4✓ Branch 0 (4→5) taken 13060 times.
✓ Branch 1 (4→8) taken 8 times.
✓ Branch 2 (5→6) taken 13058 times.
✓ Branch 3 (5→8) taken 2 times.
|
13068 | if (unlikely(ascii_test(ch, mask) || ch == '/')) { |
369 | return false; | ||
370 | } | ||
371 | } | ||
372 | |||
373 | return true; | ||
374 | } | ||
375 |