Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <stdint.h> | ||
2 | #include <stdlib.h> | ||
3 | #include "filetype.h" | ||
4 | #include "command/serialize.h" | ||
5 | #include "regexp.h" | ||
6 | #include "util/array.h" | ||
7 | #include "util/ascii.h" | ||
8 | #include "util/bsearch.h" | ||
9 | #include "util/debug.h" | ||
10 | #include "util/log.h" | ||
11 | #include "util/path.h" | ||
12 | #include "util/str-util.h" | ||
13 | #include "util/xmalloc.h" | ||
14 | #include "util/xmemmem.h" | ||
15 | |||
16 | 1853 | static int ft_compare(const void *key, const void *elem) | |
17 | { | ||
18 | 1853 | const StringView *sv = key; | |
19 | 1853 | const char *ext = elem; // Cast to first member of struct | |
20 | 1853 | int res = memcmp(sv->data, ext, sv->length); | |
21 |
4/4✓ Branch 0 taken 151 times.
✓ Branch 1 taken 1702 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 139 times.
|
1853 | if (unlikely(res == 0 && ext[sv->length] != '\0')) { |
22 | 12 | res = -1; | |
23 | } | ||
24 | 1853 | return res; | |
25 | } | ||
26 | |||
27 | // Built-in filetypes | ||
28 | // NOLINTBEGIN(bugprone-suspicious-include) | ||
29 | #include "filetype/names.c" | ||
30 | #include "filetype/basenames.c" | ||
31 | #include "filetype/directories.c" | ||
32 | #include "filetype/extensions.c" | ||
33 | #include "filetype/interpreters.c" | ||
34 | #include "filetype/ignored-exts.c" | ||
35 | #include "filetype/signatures.c" | ||
36 | // NOLINTEND(bugprone-suspicious-include) | ||
37 | |||
38 | 18 | UNITTEST { | |
39 | 18 | static_assert(NR_BUILTIN_FILETYPES < 256); | |
40 | 18 | CHECK_BSEARCH_ARRAY(basenames, name, strcmp); | |
41 | 18 | CHECK_BSEARCH_ARRAY(extensions, ext, strcmp); | |
42 | 18 | CHECK_BSEARCH_ARRAY(interpreters, key, strcmp); | |
43 | 18 | CHECK_BSEARCH_ARRAY(emacs_modes, name, strcmp); | |
44 | 18 | CHECK_BSEARCH_STR_ARRAY(ignored_extensions, strcmp); | |
45 | 18 | CHECK_BSEARCH_STR_ARRAY(builtin_filetype_names, strcmp); | |
46 | |||
47 |
2/2✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 18 times.
|
2142 | for (size_t i = 0; i < ARRAYLEN(builtin_filetype_names); i++) { |
48 | 2124 | const char *name = builtin_filetype_names[i]; | |
49 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2124 times.
|
2124 | if (unlikely(!is_valid_filetype_name(name))) { |
50 | − | BUG("invalid name at builtin_filetype_names[%zu]: \"%s\"", i, name); | |
51 | } | ||
52 | } | ||
53 | 18 | } | |
54 | |||
55 | typedef struct { | ||
56 | unsigned int str_len; | ||
57 | char str[] COUNTED_BY(str_len); | ||
58 | } FlexArrayStr; | ||
59 | |||
60 | // Filetypes dynamically added via the `ft` command. | ||
61 | // Not grouped by name to make it possible to order them freely. | ||
62 | typedef struct { | ||
63 | union { | ||
64 | FlexArrayStr *str; | ||
65 | const InternedRegexp *regexp; | ||
66 | } u; | ||
67 | uint8_t type; // FileDetectionType | ||
68 | char name[]; | ||
69 | } UserFileTypeEntry; | ||
70 | |||
71 | 103 | static bool ft_uses_regex(FileDetectionType type) | |
72 | { | ||
73 | 103 | return type == FT_CONTENT || type == FT_FILENAME; | |
74 | } | ||
75 | |||
76 | 18 | bool add_filetype(PointerArray *filetypes, const char *name, const char *str, FileDetectionType type) | |
77 | { | ||
78 | 18 | BUG_ON(!is_valid_filetype_name(name)); | |
79 | 18 | const InternedRegexp *ir = NULL; | |
80 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | if (ft_uses_regex(type)) { |
81 | 14 | ir = regexp_intern(str); | |
82 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | if (unlikely(!ir)) { |
83 | return false; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | 18 | size_t name_len = strlen(name); | |
88 | 18 | UserFileTypeEntry *ft = xmalloc(sizeof(*ft) + name_len + 1); | |
89 | 18 | ft->type = type; | |
90 | |||
91 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | if (ir) { |
92 | 14 | ft->u.regexp = ir; | |
93 | } else { | ||
94 | 4 | size_t str_len = strlen(str); | |
95 | 4 | FlexArrayStr *s = xmalloc(sizeof(*s) + str_len + 1); | |
96 | 4 | s->str_len = str_len; | |
97 | 4 | ft->u.str = s; | |
98 | 4 | memcpy(s->str, str, str_len + 1); | |
99 | } | ||
100 | |||
101 | 18 | memcpy(ft->name, name, name_len + 1); | |
102 | 18 | ptr_array_append(filetypes, ft); | |
103 | 18 | return true; | |
104 | } | ||
105 | |||
106 | 282 | static StringView path_extension(StringView filename) | |
107 | { | ||
108 | 282 | StringView ext = filename; | |
109 | 282 | ssize_t pos = strview_memrchr_idx(&ext, '.'); | |
110 |
2/2✓ Branch 0 taken 110 times.
✓ Branch 1 taken 172 times.
|
282 | strview_remove_prefix(&ext, pos > 0 ? pos + 1 : ext.length); |
111 | 282 | return ext; | |
112 | } | ||
113 | |||
114 | 267 | static StringView get_filename_extension(StringView filename) | |
115 | { | ||
116 | 267 | StringView ext = path_extension(filename); | |
117 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 252 times.
|
267 | if (is_ignored_extension(ext)) { |
118 | 15 | filename.length -= ext.length + 1; | |
119 | 15 | ext = path_extension(filename); | |
120 | } | ||
121 | 267 | strview_remove_matching_suffix(&ext, "~"); | |
122 | 267 | return ext; | |
123 | } | ||
124 | |||
125 | // Parse hashbang and return interpreter name, without version number. | ||
126 | // For example, if line is "#!/usr/bin/env python2", "python" is returned. | ||
127 | 267 | static StringView get_interpreter(StringView line) | |
128 | { | ||
129 | 267 | StringView sv = STRING_VIEW_INIT; | |
130 |
2/2✓ Branch 0 taken 196 times.
✓ Branch 1 taken 71 times.
|
267 | if (!strview_remove_matching_prefix(&line, "#!")) { |
131 | 196 | return sv; | |
132 | } | ||
133 | |||
134 | 71 | strview_trim_left(&line); | |
135 |
3/4✓ Branch 0 taken 71 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 70 times.
|
71 | if (line.length < 2 || line.data[0] != '/') { |
136 | 1 | return sv; | |
137 | } | ||
138 | |||
139 | 70 | size_t pos = 0; | |
140 | 70 | sv = get_delim(line.data, &pos, line.length, ' '); | |
141 |
4/4✓ Branch 0 taken 8 times.
✓ Branch 1 taken 62 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 2 times.
|
70 | if (pos < line.length && strview_equal_cstring(&sv, "/usr/bin/env")) { |
142 |
4/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 5 times.
|
7 | while (pos + 1 < line.length && line.data[pos] == ' ') { |
143 | 1 | pos++; | |
144 | } | ||
145 | 6 | sv = get_delim(line.data, &pos, line.length, ' '); | |
146 | } | ||
147 | |||
148 | 70 | ssize_t last_slash_idx = strview_memrchr_idx(&sv, '/'); | |
149 |
2/2✓ Branch 0 taken 65 times.
✓ Branch 1 taken 5 times.
|
70 | if (last_slash_idx >= 0) { |
150 | 65 | strview_remove_prefix(&sv, last_slash_idx + 1); | |
151 | } | ||
152 | |||
153 |
4/4✓ Branch 0 taken 81 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 65 times.
|
86 | while (sv.length && ascii_is_digit_or_dot(sv.data[sv.length - 1])) { |
154 | 16 | sv.length--; | |
155 | } | ||
156 | |||
157 | 70 | return sv; | |
158 | } | ||
159 | |||
160 | 18 | static bool ft_str_match(const UserFileTypeEntry *ft, const StringView sv) | |
161 | { | ||
162 | 18 | const FlexArrayStr *s = ft->u.str; | |
163 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3 times.
|
18 | return sv.length > 0 && strview_equal_strn(&sv, s->str, s->str_len); |
164 | } | ||
165 | |||
166 | 46 | static bool ft_regex_match(const UserFileTypeEntry *ft, const StringView sv) | |
167 | { | ||
168 | 46 | const regex_t *re = &ft->u.regexp->re; | |
169 | 46 | regmatch_t m; | |
170 |
4/4✓ Branch 0 taken 40 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 38 times.
✓ Branch 3 taken 2 times.
|
46 | return sv.length > 0 && regexp_exec(re, sv.data, sv.length, 0, &m, 0); |
171 | } | ||
172 | |||
173 | 64 | static bool ft_match(const UserFileTypeEntry *ft, const StringView sv) | |
174 | { | ||
175 | 64 | FileDetectionType t = ft->type; | |
176 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 18 times.
|
64 | return ft_uses_regex(t) ? ft_regex_match(ft, sv) : ft_str_match(ft, sv); |
177 | } | ||
178 | |||
179 | typedef FileTypeEnum (*FileTypeLookupFunc)(const StringView sv); | ||
180 | |||
181 | 267 | const char *find_ft(const PointerArray *filetypes, const char *filename, StringView line) | |
182 | { | ||
183 |
2/2✓ Branch 0 taken 147 times.
✓ Branch 1 taken 120 times.
|
267 | const char *b = filename ? path_basename(filename) : NULL; |
184 | 267 | const StringView base = strview_from_cstring(b); | |
185 | 267 | const StringView ext = get_filename_extension(base); | |
186 | 267 | const StringView path = strview_from_cstring(filename); | |
187 | 267 | const StringView interpreter = get_interpreter(line); | |
188 | 267 | BUG_ON(path.length == 0 && (base.length != 0 || ext.length != 0)); | |
189 | 267 | BUG_ON(line.length == 0 && interpreter.length != 0); | |
190 | |||
191 | // The order of elements in this array determines the order of | ||
192 | // precedence for the lookup() functions (but note that changing | ||
193 | // the initializer below makes no difference to the array order) | ||
194 | 267 | static const FileTypeLookupFunc funcs[] = { | |
195 | [FT_INTERPRETER] = filetype_from_interpreter, | ||
196 | [FT_BASENAME] = filetype_from_basename, | ||
197 | [FT_CONTENT] = filetype_from_signature, | ||
198 | [FT_EXTENSION] = filetype_from_extension, | ||
199 | [FT_FILENAME] = filetype_from_dir_prefix, | ||
200 | }; | ||
201 | |||
202 | 267 | const StringView params[] = { | |
203 | [FT_INTERPRETER] = interpreter, | ||
204 | [FT_BASENAME] = base, | ||
205 | [FT_CONTENT] = line, | ||
206 | [FT_EXTENSION] = ext, | ||
207 | [FT_FILENAME] = path, | ||
208 | }; | ||
209 | |||
210 | // Search user `ft` entries | ||
211 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 262 times.
|
326 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
212 | 64 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
213 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 59 times.
|
64 | if (ft_match(ft, params[ft->type])) { |
214 | 5 | return ft->name; | |
215 | } | ||
216 | } | ||
217 | |||
218 | // Search built-in lookup tables | ||
219 | static_assert(ARRAYLEN(funcs) == ARRAYLEN(params)); | ||
220 |
2/2✓ Branch 0 taken 863 times.
✓ Branch 1 taken 86 times.
|
949 | for (FileDetectionType i = 0; i < ARRAYLEN(funcs); i++) { |
221 | 863 | BUG_ON(!funcs[i]); | |
222 | 863 | FileTypeEnum ft = funcs[i](params[i]); | |
223 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 687 times.
|
863 | if (ft != NONE) { |
224 | 176 | return builtin_filetype_names[ft]; | |
225 | } | ||
226 | } | ||
227 | |||
228 | // Use "ini" filetype if first line looks like an ini [section] | ||
229 | 86 | strview_trim_right(&line); | |
230 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 41 times.
|
86 | if (line.length >= 4) { |
231 | 45 | const char *s = line.data; | |
232 | 45 | const size_t n = line.length; | |
233 |
6/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 40 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 1 times.
|
45 | if (s[0] == '[' && s[n - 1] == ']' && is_word_byte(s[1])) { |
234 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (!strview_contains_char_type(&line, ASCII_CNTRL)) { |
235 | return builtin_filetype_names[INI]; | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | |||
240 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 82 times.
|
84 | if (strview_equal_cstring(&ext, "conf")) { |
241 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (strview_has_prefix(&path, "/etc/systemd/")) { |
242 | 2 | return builtin_filetype_names[INI]; | |
243 | } | ||
244 | 1 | BUG_ON(!filename); | |
245 | 1 | const StringView dir = path_slice_dirname(filename); | |
246 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if ( |
247 | 1 | strview_has_prefix(&path, "/etc/") | |
248 | ✗ | || strview_has_prefix(&path, "/usr/share/") | |
249 | ✗ | || strview_has_prefix(&path, "/usr/local/share/") | |
250 | ✗ | || strview_has_suffix(&dir, "/tmpfiles.d") | |
251 | ) { | ||
252 | return builtin_filetype_names[CONFIG]; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | return NULL; | ||
257 | } | ||
258 | |||
259 | 30 | bool is_ft(const PointerArray *filetypes, const char *name) | |
260 | { | ||
261 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 16 times.
|
30 | if (BSEARCH(name, builtin_filetype_names, vstrcmp)) { |
262 | return true; | ||
263 | } | ||
264 | |||
265 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 8 times.
|
34 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
266 | 26 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
267 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 6 times.
|
26 | if (streq(ft->name, name)) { |
268 | return true; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | return false; | ||
273 | } | ||
274 | |||
275 | 2 | void collect_ft(const PointerArray *filetypes, PointerArray *a, const char *prefix) | |
276 | { | ||
277 | 2 | COLLECT_STRINGS(builtin_filetype_names, a, prefix); | |
278 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
279 | ✗ | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
280 | ✗ | const char *name = ft->name; | |
281 | ✗ | if (str_has_prefix(name, prefix)) { | |
282 | ✗ | ptr_array_append(a, xstrdup(name)); | |
283 | } | ||
284 | } | ||
285 | 2 | } | |
286 | |||
287 | 3 | static const char *ft_get_str(const UserFileTypeEntry *ft) | |
288 | { | ||
289 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | return ft_uses_regex(ft->type) ? ft->u.regexp->str : ft->u.str->str; |
290 | } | ||
291 | |||
292 | 1 | String dump_filetypes(const PointerArray *filetypes) | |
293 | { | ||
294 | 1 | static const char flags[][4] = { | |
295 | [FT_EXTENSION] = "", | ||
296 | [FT_FILENAME] = "-f ", | ||
297 | [FT_CONTENT] = "-c ", | ||
298 | [FT_INTERPRETER] = "-i ", | ||
299 | [FT_BASENAME] = "-b ", | ||
300 | }; | ||
301 | |||
302 | 1 | String s = string_new(4096); | |
303 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | for (size_t i = 0, n = filetypes->count; i < n; i++) { |
304 | 3 | const UserFileTypeEntry *ft = filetypes->ptrs[i]; | |
305 | 3 | BUG_ON(ft->type >= ARRAYLEN(flags)); | |
306 | 3 | BUG_ON(ft->name[0] == '-'); | |
307 | 3 | string_append_literal(&s, "ft "); | |
308 | 3 | string_append_cstring(&s, flags[ft->type]); | |
309 | 3 | string_append_escaped_arg(&s, ft->name, true); | |
310 | 3 | string_append_byte(&s, ' '); | |
311 | 3 | string_append_escaped_arg(&s, ft_get_str(ft), true); | |
312 | 3 | string_append_byte(&s, '\n'); | |
313 | } | ||
314 | 1 | return s; | |
315 | } | ||
316 | |||
317 | 18 | static void free_filetype_entry(UserFileTypeEntry *ft) | |
318 | { | ||
319 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 14 times.
|
18 | if (!ft_uses_regex(ft->type)) { |
320 | 4 | free(ft->u.str); | |
321 | } | ||
322 | 18 | free(ft); | |
323 | 18 | } | |
324 | |||
325 | 9 | void free_filetypes(PointerArray *filetypes) | |
326 | { | ||
327 | 9 | ptr_array_free_cb(filetypes, FREE_FUNC(free_filetype_entry)); | |
328 | 9 | } | |
329 | |||
330 | 2501 | bool is_valid_filetype_name_sv(const StringView *name) | |
331 | { | ||
332 | 2501 | const char *data = name->data; | |
333 | 2501 | const size_t len = name->length; | |
334 |
4/4✓ Branch 0 taken 2492 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 2489 times.
✓ Branch 3 taken 3 times.
|
2501 | if (unlikely(len == 0 || len > FILETYPE_NAME_MAX || data[0] == '-')) { |
335 | return false; | ||
336 | } | ||
337 | |||
338 | const AsciiCharType mask = ASCII_SPACE | ASCII_CNTRL; | ||
339 |
2/2✓ Branch 0 taken 12563 times.
✓ Branch 1 taken 2479 times.
|
15042 | for (size_t i = 0; i < len; i++) { |
340 | 12563 | unsigned char ch = data[i]; | |
341 |
4/4✓ Branch 0 taken 12555 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 12553 times.
✓ Branch 3 taken 2 times.
|
12563 | if (unlikely(ascii_test(ch, mask) || ch == '/')) { |
342 | return false; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | return true; | ||
347 | } | ||
348 |