Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <stdbool.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <string.h> | ||
4 | #include "parse.h" | ||
5 | #include "util/ascii.h" | ||
6 | #include "util/debug.h" | ||
7 | #include "util/string.h" | ||
8 | #include "util/strtonum.h" | ||
9 | #include "util/unicode.h" | ||
10 | #include "util/xmalloc.h" | ||
11 | #include "util/xstring.h" | ||
12 | |||
13 | 1102 | static size_t parse_sq(const char *cmd, size_t len, String *buf) | |
14 | { | ||
15 | 1102 | const char *end = memchr(cmd, '\'', len); | |
16 |
2/2✓ Branch 0 (2→3) taken 1101 times.
✓ Branch 1 (2→4) taken 1 times.
|
1102 | size_t pos = end ? (size_t)(end - cmd) : len; |
17 | 1102 | string_append_buf(buf, cmd, pos); | |
18 |
2/2✓ Branch 0 (5→6) taken 1 times.
✓ Branch 1 (5→7) taken 1101 times.
|
1102 | return pos + (end ? 1 : 0); |
19 | } | ||
20 | |||
21 | 6 | static size_t unicode_escape(const char *str, size_t count, String *buf) | |
22 | { | ||
23 | // Note: `u` doesn't need to be initialized here, but `gcc -Og` | ||
24 | // gives a spurious -Wmaybe-uninitialized warning if it's not | ||
25 | 6 | unsigned int u = 0; | |
26 | 6 | static_assert(sizeof(u) >= 4); | |
27 | 6 | size_t n = buf_parse_hex_uint(str, count, &u); | |
28 |
3/4✓ Branch 0 (3→4) taken 4 times.
✓ Branch 1 (3→6) taken 2 times.
✓ Branch 2 (4→5) taken 4 times.
✗ Branch 3 (4→6) not taken.
|
6 | if (likely(n > 0 && u_is_unicode(u))) { |
29 | 4 | string_append_codepoint(buf, u); | |
30 | } | ||
31 | 6 | return n; | |
32 | } | ||
33 | |||
34 | 44 | static size_t hex_escape(const char *str, size_t count, String *buf) | |
35 | { | ||
36 | 44 | unsigned int x = 0; | |
37 | 44 | size_t n = buf_parse_hex_uint(str, count, &x); | |
38 |
2/2✓ Branch 0 (3→4) taken 39 times.
✓ Branch 1 (3→5) taken 5 times.
|
44 | if (likely(n == 2)) { |
39 | 39 | string_append_byte(buf, x); | |
40 | } | ||
41 | 44 | return n; | |
42 | } | ||
43 | |||
44 | 634 | static size_t parse_dq(const char *cmd, size_t len, String *buf) | |
45 | { | ||
46 | 634 | size_t pos = 0; | |
47 |
2/2✓ Branch 0 (24→3) taken 2685 times.
✓ Branch 1 (24→25) taken 2 times.
|
2687 | while (pos < len) { |
48 | 2685 | unsigned char ch = cmd[pos++]; | |
49 |
2/2✓ Branch 0 (3→4) taken 2053 times.
✓ Branch 1 (3→25) taken 632 times.
|
2685 | if (ch == '"') { |
50 | break; | ||
51 | } | ||
52 |
2/2✓ Branch 0 (4→5) taken 652 times.
✓ Branch 1 (4→22) taken 1401 times.
|
2053 | if (ch == '\\' && pos < len) { |
53 | 652 | ch = cmd[pos++]; | |
54 |
13/13✓ Branch 0 (5→6) taken 1 times.
✓ Branch 1 (5→7) taken 1 times.
✓ Branch 2 (5→8) taken 1 times.
✓ Branch 3 (5→9) taken 3 times.
✓ Branch 4 (5→10) taken 366 times.
✓ Branch 5 (5→11) taken 9 times.
✓ Branch 6 (5→12) taken 116 times.
✓ Branch 7 (5→13) taken 1 times.
✓ Branch 8 (5→14) taken 44 times.
✓ Branch 9 (5→16) taken 4 times.
✓ Branch 10 (5→18) taken 2 times.
✓ Branch 11 (5→20) taken 3 times.
✓ Branch 12 (5→22) taken 101 times.
|
652 | switch (ch) { |
55 | 1 | case 'a': ch = '\a'; break; | |
56 | 1 | case 'b': ch = '\b'; break; | |
57 | 1 | case 'e': ch = '\033'; break; | |
58 | 3 | case 'f': ch = '\f'; break; | |
59 | 366 | case 'n': ch = '\n'; break; | |
60 | 9 | case 'r': ch = '\r'; break; | |
61 | 116 | case 't': ch = '\t'; break; | |
62 | 1 | case 'v': ch = '\v'; break; | |
63 | case '\\': | ||
64 | case '"': | ||
65 | break; | ||
66 | 44 | case 'x': | |
67 | 44 | pos += hex_escape(cmd + pos, MIN(2, len - pos), buf); | |
68 | 44 | continue; | |
69 | 4 | case 'u': | |
70 | 4 | pos += unicode_escape(cmd + pos, MIN(4, len - pos), buf); | |
71 | 4 | continue; | |
72 | 2 | case 'U': | |
73 | 2 | pos += unicode_escape(cmd + pos, MIN(8, len - pos), buf); | |
74 | 2 | continue; | |
75 | 3 | default: | |
76 | 3 | string_append_byte(buf, '\\'); | |
77 | 3 | break; | |
78 | } | ||
79 | } | ||
80 | 2003 | string_append_byte(buf, ch); | |
81 | } | ||
82 | |||
83 | 634 | return pos; | |
84 | } | ||
85 | |||
86 | 25 | static size_t parse_var(const CommandRunner *runner, const char *cmd, size_t len, String *buf) | |
87 | { | ||
88 |
3/4✓ Branch 0 (2→3) taken 25 times.
✗ Branch 1 (2→13) not taken.
✓ Branch 2 (3→5) taken 22 times.
✓ Branch 3 (3→13) taken 3 times.
|
25 | if (len == 0 || !is_alpha_or_underscore(cmd[0])) { |
89 | return 0; | ||
90 | } | ||
91 | |||
92 | size_t n = 1; | ||
93 |
4/4✓ Branch 0 (5→6) taken 125 times.
✓ Branch 1 (5→7) taken 11 times.
✓ Branch 2 (6→4) taken 114 times.
✓ Branch 3 (6→7) taken 11 times.
|
136 | while (n < len && is_alnum_or_underscore(cmd[n])) { |
94 | 114 | n++; | |
95 | } | ||
96 | |||
97 |
2/2✓ Branch 0 (7→8) taken 21 times.
✓ Branch 1 (7→13) taken 1 times.
|
22 | if (runner->expand_variable) { |
98 | 21 | char *name = xstrcut(cmd, n); | |
99 | 21 | char *value = runner->expand_variable(runner->e, name); | |
100 | 21 | free(name); | |
101 |
2/2✓ Branch 0 (10→11) taken 12 times.
✓ Branch 1 (10→13) taken 9 times.
|
21 | if (value) { |
102 | 12 | string_append_cstring(buf, value); | |
103 | 12 | free(value); | |
104 | } | ||
105 | } | ||
106 | |||
107 | return n; | ||
108 | } | ||
109 | |||
110 | // Parse a single dterc(5) argument from `cmd`, stopping when an unquoted | ||
111 | // whitespace or semicolon character is found or when all `len` bytes have | ||
112 | // been processed without encountering such a character. Escape sequences | ||
113 | // and $variables are expanded during processing and the fully expanded | ||
114 | // result is returned as a malloc'd string. | ||
115 | 26587 | char *parse_command_arg(const CommandRunner *runner, const char *cmd, size_t len) | |
116 | { | ||
117 | 26587 | const StringView *home = runner->home_dir; | |
118 | 26587 | bool expand_ts = runner->expand_tilde_slash; | |
119 |
4/4✓ Branch 0 (2→3) taken 25766 times.
✓ Branch 1 (2→6) taken 821 times.
✓ Branch 2 (4→5) taken 3 times.
✓ Branch 3 (4→6) taken 25763 times.
|
26587 | bool tilde_slash = expand_ts && len >= 2 && mem_equal(cmd, "~/", 2); |
120 | 26587 | String buf = string_new(len + 1 + (tilde_slash ? home->length : 0)); | |
121 | 26587 | size_t pos = 0; | |
122 | |||
123 |
2/2✓ Branch 0 (7→8) taken 3 times.
✓ Branch 1 (7→21) taken 26584 times.
|
26587 | if (tilde_slash) { |
124 | 3 | string_append_strview(&buf, home); | |
125 | 3 | pos += 1; // Skip past '~' and leave '/' to be handled below | |
126 | } | ||
127 | |||
128 |
2/2✓ Branch 0 (24→9) taken 146776 times.
✓ Branch 1 (24→25) taken 26581 times.
|
173357 | while (pos < len) { |
129 | 146776 | char ch = cmd[pos++]; | |
130 |
6/6✓ Branch 0 (9→10) taken 5 times.
✓ Branch 1 (9→11) taken 1102 times.
✓ Branch 2 (9→13) taken 634 times.
✓ Branch 3 (9→15) taken 25 times.
✓ Branch 4 (9→17) taken 226 times.
✓ Branch 5 (9→20) taken 144784 times.
|
146776 | switch (ch) { |
131 | 5 | case '\t': | |
132 | case '\n': | ||
133 | case '\r': | ||
134 | case ' ': | ||
135 | case ';': | ||
136 | 5 | goto end; | |
137 | 1102 | case '\'': | |
138 | 1102 | pos += parse_sq(cmd + pos, len - pos, &buf); | |
139 | 1102 | break; | |
140 | 634 | case '"': | |
141 | 634 | pos += parse_dq(cmd + pos, len - pos, &buf); | |
142 | 634 | break; | |
143 | 25 | case '$': | |
144 | 25 | pos += parse_var(runner, cmd + pos, len - pos, &buf); | |
145 | 25 | break; | |
146 | 226 | case '\\': | |
147 |
2/2✓ Branch 0 (17→18) taken 1 times.
✓ Branch 1 (17→19) taken 225 times.
|
226 | if (unlikely(pos == len)) { |
148 | 1 | goto end; | |
149 | } | ||
150 | 225 | ch = cmd[pos++]; | |
151 | // Fallthrough | ||
152 | 145009 | default: | |
153 | 145009 | string_append_byte(&buf, ch); | |
154 | 145009 | break; | |
155 | } | ||
156 | } | ||
157 | |||
158 | 26581 | end: | |
159 | 26587 | return string_steal_cstring(&buf); | |
160 | } | ||
161 | |||
162 | 26537 | size_t find_end(const char *cmd, size_t pos, CommandParseError *err) | |
163 | { | ||
164 | 173305 | while (1) { | |
165 |
5/5✓ Branch 0 (5→5) taken 144841 times.
✓ Branch 1 (5→6) taken 1092 times.
✓ Branch 2 (5→11) taken 623 times.
✓ Branch 3 (5→20) taken 226 times.
✓ Branch 4 (5→23) taken 26523 times.
|
173305 | switch (cmd[pos++]) { |
166 | 18892 | case '\'': | |
167 | 36692 | while (1) { | |
168 |
2/2✓ Branch 0 (6→7) taken 1087 times.
✓ Branch 1 (6→8) taken 17805 times.
|
18892 | if (cmd[pos] == '\'') { |
169 | 1087 | pos++; | |
170 | 1087 | break; | |
171 | } | ||
172 |
2/2✓ Branch 0 (8→9) taken 5 times.
✓ Branch 1 (8→10) taken 17800 times.
|
17805 | if (unlikely(cmd[pos] == '\0')) { |
173 | 5 | *err = CMDERR_UNCLOSED_SQUOTE; | |
174 | 5 | return 0; | |
175 | } | ||
176 | 17800 | pos++; | |
177 | } | ||
178 | 1087 | break; | |
179 | 1249 | case '"': | |
180 | 2701 | while (1) { | |
181 |
2/2✓ Branch 0 (12→13) taken 616 times.
✓ Branch 1 (12→14) taken 2085 times.
|
2701 | if (cmd[pos] == '"') { |
182 | 616 | pos++; | |
183 | 616 | break; | |
184 | } | ||
185 |
2/2✓ Branch 0 (14→15) taken 5 times.
✓ Branch 1 (14→16) taken 2080 times.
|
2085 | if (unlikely(cmd[pos] == '\0')) { |
186 | 5 | *err = CMDERR_UNCLOSED_DQUOTE; | |
187 | 5 | return 0; | |
188 | } | ||
189 |
2/2✓ Branch 0 (16→12) taken 1452 times.
✓ Branch 1 (16→17) taken 628 times.
|
2080 | if (cmd[pos++] == '\\') { |
190 |
2/2✓ Branch 0 (17→18) taken 2 times.
✓ Branch 1 (17→19) taken 626 times.
|
628 | if (unlikely(cmd[pos] == '\0')) { |
191 | 2 | *err = CMDERR_UNEXPECTED_EOF; | |
192 | 2 | return 0; | |
193 | } | ||
194 | 626 | pos++; | |
195 | } | ||
196 | } | ||
197 | 616 | break; | |
198 | 226 | case '\\': | |
199 |
2/2✓ Branch 0 (20→21) taken 2 times.
✓ Branch 1 (20→22) taken 224 times.
|
226 | if (unlikely(cmd[pos] == '\0')) { |
200 | 2 | *err = CMDERR_UNEXPECTED_EOF; | |
201 | 2 | return 0; | |
202 | } | ||
203 | 224 | pos++; | |
204 | 224 | break; | |
205 | 26523 | case '\0': | |
206 | case '\t': | ||
207 | case '\n': | ||
208 | case '\r': | ||
209 | case ' ': | ||
210 | case ';': | ||
211 | 26523 | *err = CMDERR_NONE; | |
212 | 26523 | return pos - 1; | |
213 | } | ||
214 | } | ||
215 | |||
216 | BUG("Unexpected break of outer loop"); | ||
217 | } | ||
218 | |||
219 | // Note: `array` must be freed, regardless of the return value | ||
220 | 9541 | CommandParseError parse_commands(const CommandRunner *runner, PointerArray *array, const char *cmd) | |
221 | { | ||
222 | 9541 | for (size_t pos = 0; true; ) { | |
223 |
2/2✓ Branch 0 (5→3) taken 18147 times.
✓ Branch 1 (5→6) taken 35914 times.
|
54061 | while (ascii_isspace(cmd[pos])) { |
224 | 18147 | pos++; | |
225 | } | ||
226 | |||
227 |
2/2✓ Branch 0 (6→7) taken 26387 times.
✓ Branch 1 (6→16) taken 9527 times.
|
35914 | if (cmd[pos] == '\0') { |
228 | break; | ||
229 | } | ||
230 | |||
231 |
2/2✓ Branch 0 (7→8) taken 66 times.
✓ Branch 1 (7→10) taken 26321 times.
|
26387 | if (cmd[pos] == ';') { |
232 | 66 | ptr_array_append(array, NULL); | |
233 | 66 | pos++; | |
234 | 66 | continue; | |
235 | } | ||
236 | |||
237 | 26321 | CommandParseError err; | |
238 | 26321 | size_t end = find_end(cmd, pos, &err); | |
239 |
2/2✓ Branch 0 (11→12) taken 14 times.
✓ Branch 1 (11→13) taken 26307 times.
|
26321 | if (err != CMDERR_NONE) { |
240 | 14 | return err; | |
241 | } | ||
242 | |||
243 | 26307 | ptr_array_append(array, parse_command_arg(runner, cmd + pos, end - pos)); | |
244 | 26307 | pos = end; | |
245 | } | ||
246 | |||
247 | 9527 | ptr_array_append(array, NULL); | |
248 | 9527 | return CMDERR_NONE; | |
249 | } | ||
250 | |||
251 | 6 | const char *command_parse_error_to_string(CommandParseError err) | |
252 | { | ||
253 | 6 | static const char error_strings[][16] = { | |
254 | [CMDERR_UNCLOSED_SQUOTE] = "unclosed '", | ||
255 | [CMDERR_UNCLOSED_DQUOTE] = "unclosed \"", | ||
256 | [CMDERR_UNEXPECTED_EOF] = "unexpected EOF", | ||
257 | }; | ||
258 | |||
259 | 6 | BUG_ON(err <= CMDERR_NONE); | |
260 | 6 | BUG_ON(err >= ARRAYLEN(error_strings)); | |
261 | 6 | return error_strings[err]; | |
262 | } | ||
263 |