Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <string.h> | ||
2 | #include "block-iter.h" | ||
3 | #include "util/ascii.h" | ||
4 | #include "util/debug.h" | ||
5 | #include "util/utf8.h" | ||
6 | #include "util/xmalloc.h" | ||
7 | #include "util/xmemrchr.h" | ||
8 | |||
9 | NONNULL_ARGS_AND_RETURN | ||
10 | 548 | static const char *block_memchr_eol(const Block *blk, size_t offset) | |
11 | { | ||
12 | 548 | BUG_ON(offset > blk->size); | |
13 | 548 | const char *eol = memchr(blk->data + offset, '\n', blk->size - offset); | |
14 | 548 | BUG_ON(!eol); | |
15 | 548 | return eol; | |
16 | } | ||
17 | |||
18 | // Move after next newline (beginning of next line or end of file) | ||
19 | // and return number of bytes moved | ||
20 | 135 | size_t block_iter_eat_line(BlockIter *bi) | |
21 | { | ||
22 | 135 | block_iter_normalize(bi); | |
23 | 135 | const size_t offset = bi->offset; | |
24 |
2/2✓ Branch 0 (3→4) taken 132 times.
✓ Branch 1 (3→9) taken 3 times.
|
135 | if (unlikely(offset == bi->blk->size)) { |
25 | return 0; | ||
26 | } | ||
27 | |||
28 | // There must be at least one newline | ||
29 |
2/2✓ Branch 0 (4→5) taken 7 times.
✓ Branch 1 (4→6) taken 125 times.
|
132 | if (bi->blk->nl == 1) { |
30 | 7 | bi->offset = bi->blk->size; | |
31 | } else { | ||
32 | 125 | const char *end = block_memchr_eol(bi->blk, offset); | |
33 | 125 | bi->offset = (size_t)(end + 1 - bi->blk->data); | |
34 | } | ||
35 | |||
36 | 132 | return bi->offset - offset; | |
37 | } | ||
38 | |||
39 | // Move to beginning of next line (if any) and return number of bytes moved | ||
40 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
41 | { | ||
42 | 79 | block_iter_normalize(bi); | |
43 | 79 | const size_t offset = bi->offset; | |
44 |
1/2✓ Branch 0 (3→4) taken 79 times.
✗ Branch 1 (3→10) not taken.
|
79 | if (unlikely(offset == bi->blk->size)) { |
45 | return 0; | ||
46 | } | ||
47 | |||
48 | // There must be at least one newline | ||
49 | 79 | size_t new_offset; | |
50 |
1/2✓ Branch 0 (4→5) taken 79 times.
✗ Branch 1 (4→7) not taken.
|
79 | if (bi->blk->nl == 1) { |
51 | new_offset = bi->blk->size; | ||
52 | } else { | ||
53 | 79 | const char *end = block_memchr_eol(bi->blk, offset); | |
54 | 79 | new_offset = (size_t)(end + 1 - bi->blk->data); | |
55 | } | ||
56 | |||
57 |
3/4✓ Branch 0 (7→8) taken 15 times.
✓ Branch 1 (7→9) taken 64 times.
✗ Branch 2 (8→9) not taken.
✓ Branch 3 (8→10) taken 15 times.
|
79 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { |
58 | return 0; | ||
59 | } | ||
60 | |||
61 | 64 | bi->offset = new_offset; | |
62 | 64 | return bi->offset - offset; | |
63 | } | ||
64 | |||
65 | // Move to beginning of previous line (if any) and return number of bytes moved | ||
66 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
67 | { | ||
68 | 101 | Block *blk = bi->blk; | |
69 | 101 | size_t offset = bi->offset; | |
70 | 101 | size_t start = offset; | |
71 | |||
72 |
4/4✓ Branch 0 (3→4) taken 217 times.
✓ Branch 1 (3→5) taken 10 times.
✓ Branch 2 (4→3) taken 126 times.
✓ Branch 3 (4→5) taken 91 times.
|
227 | while (offset && blk->data[offset - 1] != '\n') { |
73 | offset--; | ||
74 | } | ||
75 | |||
76 |
2/2✓ Branch 0 (5→6) taken 10 times.
✓ Branch 1 (5→8) taken 91 times.
|
101 | if (!offset) { |
77 |
1/2✗ Branch 0 (6→7) not taken.
✓ Branch 1 (6→12) taken 10 times.
|
10 | if (blk->node.prev == bi->head) { |
78 | return 0; | ||
79 | } | ||
80 | ✗ | bi->blk = blk = BLOCK(blk->node.prev); | |
81 | ✗ | offset = blk->size; | |
82 | ✗ | start += offset; | |
83 | } | ||
84 | |||
85 | 91 | offset--; | |
86 |
4/4✓ Branch 0 (9→10) taken 922 times.
✓ Branch 1 (9→11) taken 17 times.
✓ Branch 2 (10→9) taken 848 times.
✓ Branch 3 (10→11) taken 74 times.
|
939 | while (offset && blk->data[offset - 1] != '\n') { |
87 | offset--; | ||
88 | } | ||
89 | 91 | bi->offset = offset; | |
90 | 91 | return start - offset; | |
91 | } | ||
92 | |||
93 | 123 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
94 | { | ||
95 | 123 | BlockIter tmp = *bi; | |
96 | 123 | return block_iter_next_char(&tmp, up); | |
97 | } | ||
98 | |||
99 | 291 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
100 | { | ||
101 | 291 | size_t offset = bi->offset; | |
102 |
2/2✓ Branch 0 (2→3) taken 17 times.
✓ Branch 1 (2→5) taken 274 times.
|
291 | if (unlikely(offset == bi->blk->size)) { |
103 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 17 times.
|
17 | if (unlikely(bi->blk->node.next == bi->head)) { |
104 | return 0; | ||
105 | } | ||
106 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
107 | ✗ | bi->offset = offset = 0; | |
108 | } | ||
109 | |||
110 | // Note: this block can't be empty | ||
111 | 274 | unsigned char byte = bi->blk->data[offset]; | |
112 |
2/2✓ Branch 0 (5→6) taken 272 times.
✓ Branch 1 (5→7) taken 2 times.
|
274 | if (likely(byte < 0x80)) { |
113 | 272 | *up = byte; | |
114 | 272 | bi->offset++; | |
115 | 272 | return 1; | |
116 | } | ||
117 | |||
118 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
119 | 2 | return bi->offset - offset; | |
120 | } | ||
121 | |||
122 | 94 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
123 | { | ||
124 | 94 | size_t offset = bi->offset; | |
125 |
2/2✓ Branch 0 (2→3) taken 6 times.
✓ Branch 1 (2→5) taken 88 times.
|
94 | if (unlikely(offset == 0)) { |
126 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 6 times.
|
6 | if (unlikely(bi->blk->node.prev == bi->head)) { |
127 | return 0; | ||
128 | } | ||
129 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
130 | ✗ | bi->offset = offset = bi->blk->size; | |
131 | } | ||
132 | |||
133 | // Note: this block can't be empty | ||
134 | 88 | unsigned char byte = bi->blk->data[offset - 1]; | |
135 |
1/2✓ Branch 0 (5→6) taken 88 times.
✗ Branch 1 (5→7) not taken.
|
88 | if (likely(byte < 0x80)) { |
136 | 88 | *up = byte; | |
137 | 88 | bi->offset--; | |
138 | 88 | return 1; | |
139 | } | ||
140 | |||
141 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
142 | ✗ | return offset - bi->offset; | |
143 | } | ||
144 | |||
145 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
146 | { | ||
147 | 45 | CodePoint u; | |
148 | 45 | size_t size = block_iter_next_char(bi, &u); | |
149 |
3/4✓ Branch 0 (7→8) taken 38 times.
✓ Branch 1 (7→10) taken 7 times.
✗ Branch 2 (9→4) not taken.
✓ Branch 3 (9→10) taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
150 | ✗ | size += block_iter_next_char(bi, &u); | |
151 | } | ||
152 | 45 | return size; | |
153 | } | ||
154 | |||
155 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
156 | { | ||
157 | 16 | CodePoint u; | |
158 | 16 | size_t skip, total = 0; | |
159 | 16 | do { | |
160 | 16 | skip = block_iter_prev_char(bi, &u); | |
161 | 16 | total += skip; | |
162 |
3/4✓ Branch 0 (4→5) taken 13 times.
✓ Branch 1 (4→8) taken 3 times.
✗ Branch 2 (6→7) not taken.
✓ Branch 3 (6→8) taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
163 | 16 | return total; | |
164 | } | ||
165 | |||
166 | 233 | size_t block_iter_bol(BlockIter *bi) | |
167 | { | ||
168 | 233 | block_iter_normalize(bi); | |
169 | 233 | size_t offset = bi->offset; | |
170 |
2/2✓ Branch 0 (3→4) taken 98 times.
✓ Branch 1 (3→13) taken 135 times.
|
233 | if (block_iter_is_bol(bi)) { |
171 | return 0; | ||
172 | } | ||
173 | |||
174 | // These cases are handled by the condition above | ||
175 | 98 | const Block *blk = bi->blk; | |
176 | 98 | BUG_ON(offset == 0); | |
177 | 98 | BUG_ON(offset >= blk->size); | |
178 | |||
179 |
2/2✓ Branch 0 (8→9) taken 30 times.
✓ Branch 1 (8→10) taken 68 times.
|
98 | if (blk->nl == 1) { |
180 | 30 | bi->offset = 0; // Only 1 line in `blk`; bol is at offset 0 | |
181 | 30 | return offset; | |
182 | } | ||
183 | |||
184 | 68 | const char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
185 |
2/2✓ Branch 0 (10→11) taken 26 times.
✓ Branch 1 (10→12) taken 42 times.
|
68 | if (!nl) { |
186 | 26 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
187 | 26 | return offset; | |
188 | } | ||
189 | |||
190 | 42 | offset = (size_t)(nl - blk->data) + 1; | |
191 | 42 | size_t count = bi->offset - offset; | |
192 | 42 | bi->offset = offset; | |
193 | 42 | return count; | |
194 | } | ||
195 | |||
196 | 28 | size_t block_iter_eol(BlockIter *bi) | |
197 | { | ||
198 | 28 | block_iter_normalize(bi); | |
199 | 28 | const Block *blk = bi->blk; | |
200 | 28 | const size_t offset = bi->offset; | |
201 | |||
202 |
2/2✓ Branch 0 (3→4) taken 25 times.
✓ Branch 1 (3→8) taken 3 times.
|
28 | if (unlikely(offset == blk->size)) { |
203 | // Cursor at end of last block | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 |
2/2✓ Branch 0 (4→5) taken 4 times.
✓ Branch 1 (4→6) taken 21 times.
|
25 | if (blk->nl == 1) { |
208 | 4 | bi->offset = blk->size - 1; | |
209 | 4 | return bi->offset - offset; | |
210 | } | ||
211 | |||
212 | 21 | const char *end = block_memchr_eol(blk, offset); | |
213 | 21 | bi->offset = (size_t)(end - blk->data); | |
214 | 21 | return bi->offset - offset; | |
215 | } | ||
216 | |||
217 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
218 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
219 | { | ||
220 | 30 | block_iter_normalize(bi); | |
221 | 30 | const char *data = bi->blk->data; | |
222 | 30 | size_t count = 0; | |
223 | 30 | size_t i = bi->offset; | |
224 | |||
225 | // We're only operating on one line and checking for ASCII characters, | ||
226 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
227 |
1/2✓ Branch 0 (6→4) taken 52 times.
✗ Branch 1 (6→7) not taken.
|
52 | for (size_t n = bi->blk->size; i < n; count++) { |
228 | 52 | unsigned char c = data[i++]; | |
229 |
2/2✓ Branch 0 (4→5) taken 22 times.
✓ Branch 1 (4→7) taken 30 times.
|
52 | if (!ascii_isblank(c)) { |
230 | break; | ||
231 | } | ||
232 | } | ||
233 | |||
234 | 30 | bi->offset = i; | |
235 | 30 | return count; | |
236 | } | ||
237 | |||
238 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
239 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
240 | |||
241 | { | ||
242 | 30 | block_iter_normalize(bi); | |
243 | 30 | size_t count = 0; | |
244 | 30 | size_t i = bi->offset; | |
245 | |||
246 |
1/2✓ Branch 0 (6→4) taken 61 times.
✗ Branch 1 (6→7) not taken.
|
61 | for (const char *data = bi->blk->data; i > 0; count++) { |
247 | 61 | unsigned char c = data[--i]; | |
248 |
2/2✓ Branch 0 (4→5) taken 31 times.
✓ Branch 1 (4→7) taken 30 times.
|
61 | if (!ascii_isblank(c)) { |
249 | i++; | ||
250 | break; | ||
251 | } | ||
252 | } | ||
253 | |||
254 | 30 | bi->offset = i; | |
255 | 30 | return count; | |
256 | } | ||
257 | |||
258 | // Non-empty line can be used to determine size of indentation for the next line | ||
259 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
260 | { | ||
261 | 14 | block_iter_bol(bi); | |
262 | 18 | do { | |
263 | 18 | StringView line = block_iter_get_line(bi); | |
264 |
2/2✓ Branch 0 (4→5) taken 14 times.
✓ Branch 1 (4→6) taken 4 times.
|
18 | if (!strview_isblank(line)) { |
265 | 14 | return true; | |
266 | } | ||
267 |
1/2✓ Branch 0 (7→3) taken 4 times.
✗ Branch 1 (7→8) not taken.
|
4 | } while (block_iter_prev_line(bi)); |
268 | return false; | ||
269 | } | ||
270 | |||
271 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
272 | { | ||
273 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 1 times.
|
1 | while (count > bi->offset) { |
274 | ✗ | count -= bi->offset; | |
275 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
276 | ✗ | bi->offset = bi->blk->size; | |
277 | } | ||
278 | 1 | bi->offset -= count; | |
279 | 1 | } | |
280 | |||
281 | 291 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
282 | { | ||
283 | 291 | size_t avail = bi->blk->size - bi->offset; | |
284 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 291 times.
|
291 | while (count > avail) { |
285 | ✗ | count -= avail; | |
286 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
287 | ✗ | bi->offset = 0; | |
288 | ✗ | avail = bi->blk->size; | |
289 | } | ||
290 | 291 | bi->offset += count; | |
291 | 291 | } | |
292 | |||
293 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
294 | { | ||
295 | 63 | Block *blk; | |
296 |
1/2✓ Branch 0 (6→3) taken 63 times.
✗ Branch 1 (6→7) not taken.
|
63 | block_for_each(blk, bi->head) { |
297 |
1/2✓ Branch 0 (3→4) taken 63 times.
✗ Branch 1 (3→5) not taken.
|
63 | if (offset <= blk->size) { |
298 | 63 | bi->blk = blk; | |
299 | 63 | bi->offset = offset; | |
300 | 63 | return; | |
301 | } | ||
302 | ✗ | offset -= blk->size; | |
303 | } | ||
304 | } | ||
305 | |||
306 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
307 | { | ||
308 | 4 | Block *blk = BLOCK(bi->head->next); | |
309 | 4 | size_t nl = 0; | |
310 |
1/4✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→6) taken 4 times.
✗ Branch 2 (5→3) not taken.
✗ Branch 3 (5→6) not taken.
|
4 | while (blk->node.next != bi->head && nl + blk->nl < line) { |
311 | ✗ | nl += blk->nl; | |
312 | ✗ | blk = BLOCK(blk->node.next); | |
313 | } | ||
314 | |||
315 | 4 | bi->blk = blk; | |
316 | 4 | bi->offset = 0; | |
317 |
2/2✓ Branch 0 (10→7) taken 8 times.
✓ Branch 1 (10→11) taken 3 times.
|
11 | while (nl < line) { |
318 |
2/2✓ Branch 0 (8→9) taken 7 times.
✓ Branch 1 (8→11) taken 1 times.
|
8 | if (!block_iter_eat_line(bi)) { |
319 | break; | ||
320 | } | ||
321 | 7 | nl++; | |
322 | } | ||
323 | 4 | } | |
324 | |||
325 | 328 | size_t block_iter_get_offset(const BlockIter *bi) | |
326 | { | ||
327 | 328 | const Block *blk; | |
328 | 328 | size_t offset = 0; | |
329 |
1/2✓ Branch 0 (5→3) taken 328 times.
✗ Branch 1 (5→6) not taken.
|
328 | block_for_each(blk, bi->head) { |
330 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 328 times.
|
328 | if (blk == bi->blk) { |
331 | break; | ||
332 | } | ||
333 | ✗ | offset += blk->size; | |
334 | } | ||
335 | 328 | return offset + bi->offset; | |
336 | } | ||
337 | |||
338 | 13 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | |
339 | { | ||
340 |
1/2✓ Branch 0 (2→3) taken 13 times.
✗ Branch 1 (2→10) not taken.
|
13 | if (len == 0) { |
341 | return NULL; | ||
342 | } | ||
343 | |||
344 | 13 | const Block *blk = bi->blk; | |
345 | 13 | size_t offset = bi->offset; | |
346 | 13 | size_t pos = 0; | |
347 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
348 | |||
349 |
2/2✓ Branch 0 (9→5) taken 13 times.
✓ Branch 1 (9→10) taken 13 times.
|
26 | while (pos < len) { |
350 | 13 | const size_t avail = blk->size - offset; | |
351 | 13 | size_t count = MIN(len - pos, avail); | |
352 | 13 | memcpy(buf + pos, blk->data + offset, count); | |
353 | 13 | pos += count; | |
354 | 13 | BUG_ON(pos < len && blk->node.next == bi->head); | |
355 | 13 | blk = BLOCK(blk->node.next); | |
356 | 13 | offset = 0; | |
357 | } | ||
358 | |||
359 | return buf; | ||
360 | } | ||
361 | |||
362 | // Return the contents of the line that extends from `bi`. Callers | ||
363 | // should ensure `bi` is already at BOL, if whole lines are needed. | ||
364 | 381 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
365 | { | ||
366 | 381 | block_iter_normalize(bi); | |
367 | 381 | const char *start = bi->blk->data + bi->offset; | |
368 | 381 | StringView line = {.data = start, .length = 0}; | |
369 | 381 | size_t max = bi->blk->size - bi->offset; | |
370 | |||
371 |
2/2✓ Branch 0 (3→4) taken 10 times.
✓ Branch 1 (3→5) taken 371 times.
|
381 | if (unlikely(max == 0)) { |
372 | // Cursor at end of last block | ||
373 | 10 | return line; | |
374 | } | ||
375 | |||
376 |
2/2✓ Branch 0 (5→6) taken 48 times.
✓ Branch 1 (5→9) taken 323 times.
|
371 | if (bi->blk->nl == 1) { |
377 | // Block contains only 1 line; end-of-line is end-of-block | ||
378 | 48 | BUG_ON(line.data[max - 1] != '\n'); | |
379 | 48 | line.length = max; | |
380 | 48 | return line; | |
381 | } | ||
382 | |||
383 | 323 | const char *nl = block_memchr_eol(bi->blk, bi->offset); | |
384 | 323 | line.length = (size_t)(nl - start + 1); | |
385 | 323 | BUG_ON(line.length == 0); | |
386 | 323 | return line; | |
387 | } | ||
388 |