Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <string.h> | ||
2 | #include "block-iter.h" | ||
3 | #include "util/ascii.h" | ||
4 | #include "util/debug.h" | ||
5 | #include "util/utf8.h" | ||
6 | #include "util/xmalloc.h" | ||
7 | #include "util/xmemrchr.h" | ||
8 | |||
9 | /* | ||
10 | * Move after next newline (beginning of next line or end of file). | ||
11 | * Returns number of bytes iterator advanced. | ||
12 | */ | ||
13 | 135 | size_t block_iter_eat_line(BlockIter *bi) | |
14 | { | ||
15 | 135 | block_iter_normalize(bi); | |
16 | 135 | const size_t offset = bi->offset; | |
17 |
2/2✓ Branch 0 (3→4) taken 132 times.
✓ Branch 1 (3→10) taken 3 times.
|
135 | if (unlikely(offset == bi->blk->size)) { |
18 | return 0; | ||
19 | } | ||
20 | |||
21 | // There must be at least one newline | ||
22 |
2/2✓ Branch 0 (4→5) taken 7 times.
✓ Branch 1 (4→6) taken 125 times.
|
132 | if (bi->blk->nl == 1) { |
23 | 7 | bi->offset = bi->blk->size; | |
24 | } else { | ||
25 | 125 | const unsigned char *end; | |
26 | 125 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | |
27 | 125 | BUG_ON(!end); | |
28 | 125 | bi->offset = (size_t)(end + 1 - bi->blk->data); | |
29 | } | ||
30 | |||
31 | 132 | return bi->offset - offset; | |
32 | } | ||
33 | |||
34 | /* | ||
35 | * Move to beginning of next line. | ||
36 | * If there is no next line, iterator is not advanced. | ||
37 | * Returns number of bytes iterator advanced. | ||
38 | */ | ||
39 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
40 | { | ||
41 | 79 | block_iter_normalize(bi); | |
42 | 79 | const size_t offset = bi->offset; | |
43 |
1/2✓ Branch 0 (3→4) taken 79 times.
✗ Branch 1 (3→11) not taken.
|
79 | if (unlikely(offset == bi->blk->size)) { |
44 | return 0; | ||
45 | } | ||
46 | |||
47 | // There must be at least one newline | ||
48 | 79 | size_t new_offset; | |
49 |
1/2✓ Branch 0 (4→5) taken 79 times.
✗ Branch 1 (4→8) not taken.
|
79 | if (bi->blk->nl == 1) { |
50 | new_offset = bi->blk->size; | ||
51 | } else { | ||
52 | 79 | const unsigned char *end; | |
53 | 79 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | |
54 | 79 | BUG_ON(!end); | |
55 | 79 | new_offset = (size_t)(end + 1 - bi->blk->data); | |
56 | } | ||
57 |
3/4✓ Branch 0 (8→9) taken 15 times.
✓ Branch 1 (8→10) taken 64 times.
✗ Branch 2 (9→10) not taken.
✓ Branch 3 (9→11) taken 15 times.
|
79 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { |
58 | return 0; | ||
59 | } | ||
60 | |||
61 | 64 | bi->offset = new_offset; | |
62 | 64 | return bi->offset - offset; | |
63 | } | ||
64 | |||
65 | /* | ||
66 | * Move to beginning of previous line. | ||
67 | * Returns number of bytes moved, which is zero if there's no previous line. | ||
68 | */ | ||
69 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
70 | { | ||
71 | 101 | Block *blk = bi->blk; | |
72 | 101 | size_t offset = bi->offset; | |
73 | 101 | size_t start = offset; | |
74 | |||
75 |
4/4✓ Branch 0 (3→4) taken 217 times.
✓ Branch 1 (3→5) taken 10 times.
✓ Branch 2 (4→3) taken 126 times.
✓ Branch 3 (4→5) taken 91 times.
|
227 | while (offset && blk->data[offset - 1] != '\n') { |
76 | offset--; | ||
77 | } | ||
78 | |||
79 |
2/2✓ Branch 0 (5→6) taken 10 times.
✓ Branch 1 (5→8) taken 91 times.
|
101 | if (!offset) { |
80 |
1/2✗ Branch 0 (6→7) not taken.
✓ Branch 1 (6→12) taken 10 times.
|
10 | if (blk->node.prev == bi->head) { |
81 | return 0; | ||
82 | } | ||
83 | ✗ | bi->blk = blk = BLOCK(blk->node.prev); | |
84 | ✗ | offset = blk->size; | |
85 | ✗ | start += offset; | |
86 | } | ||
87 | |||
88 | 91 | offset--; | |
89 |
4/4✓ Branch 0 (9→10) taken 922 times.
✓ Branch 1 (9→11) taken 17 times.
✓ Branch 2 (10→9) taken 848 times.
✓ Branch 3 (10→11) taken 74 times.
|
939 | while (offset && blk->data[offset - 1] != '\n') { |
90 | offset--; | ||
91 | } | ||
92 | 91 | bi->offset = offset; | |
93 | 91 | return start - offset; | |
94 | } | ||
95 | |||
96 | 123 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
97 | { | ||
98 | 123 | BlockIter tmp = *bi; | |
99 | 123 | return block_iter_next_char(&tmp, up); | |
100 | } | ||
101 | |||
102 | 291 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
103 | { | ||
104 | 291 | size_t offset = bi->offset; | |
105 |
2/2✓ Branch 0 (2→3) taken 17 times.
✓ Branch 1 (2→5) taken 274 times.
|
291 | if (unlikely(offset == bi->blk->size)) { |
106 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 17 times.
|
17 | if (unlikely(bi->blk->node.next == bi->head)) { |
107 | return 0; | ||
108 | } | ||
109 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
110 | ✗ | bi->offset = offset = 0; | |
111 | } | ||
112 | |||
113 | // Note: this block can't be empty | ||
114 | 274 | unsigned char byte = bi->blk->data[offset]; | |
115 |
2/2✓ Branch 0 (5→6) taken 272 times.
✓ Branch 1 (5→7) taken 2 times.
|
274 | if (likely(byte < 0x80)) { |
116 | 272 | *up = byte; | |
117 | 272 | bi->offset++; | |
118 | 272 | return 1; | |
119 | } | ||
120 | |||
121 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
122 | 2 | return bi->offset - offset; | |
123 | } | ||
124 | |||
125 | 94 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
126 | { | ||
127 | 94 | size_t offset = bi->offset; | |
128 |
2/2✓ Branch 0 (2→3) taken 6 times.
✓ Branch 1 (2→5) taken 88 times.
|
94 | if (unlikely(offset == 0)) { |
129 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 6 times.
|
6 | if (unlikely(bi->blk->node.prev == bi->head)) { |
130 | return 0; | ||
131 | } | ||
132 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
133 | ✗ | bi->offset = offset = bi->blk->size; | |
134 | } | ||
135 | |||
136 | // Note: this block can't be empty | ||
137 | 88 | unsigned char byte = bi->blk->data[offset - 1]; | |
138 |
1/2✓ Branch 0 (5→6) taken 88 times.
✗ Branch 1 (5→7) not taken.
|
88 | if (likely(byte < 0x80)) { |
139 | 88 | *up = byte; | |
140 | 88 | bi->offset--; | |
141 | 88 | return 1; | |
142 | } | ||
143 | |||
144 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
145 | ✗ | return offset - bi->offset; | |
146 | } | ||
147 | |||
148 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
149 | { | ||
150 | 45 | CodePoint u; | |
151 | 45 | size_t size = block_iter_next_char(bi, &u); | |
152 |
3/4✓ Branch 0 (7→8) taken 38 times.
✓ Branch 1 (7→10) taken 7 times.
✗ Branch 2 (9→4) not taken.
✓ Branch 3 (9→10) taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
153 | ✗ | size += block_iter_next_char(bi, &u); | |
154 | } | ||
155 | 45 | return size; | |
156 | } | ||
157 | |||
158 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
159 | { | ||
160 | 16 | CodePoint u; | |
161 | 16 | size_t skip, total = 0; | |
162 | 16 | do { | |
163 | 16 | skip = block_iter_prev_char(bi, &u); | |
164 | 16 | total += skip; | |
165 |
3/4✓ Branch 0 (4→5) taken 13 times.
✓ Branch 1 (4→8) taken 3 times.
✗ Branch 2 (6→7) not taken.
✓ Branch 3 (6→8) taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
166 | 16 | return total; | |
167 | } | ||
168 | |||
169 | 233 | size_t block_iter_bol(BlockIter *bi) | |
170 | { | ||
171 | 233 | block_iter_normalize(bi); | |
172 | 233 | size_t offset = bi->offset; | |
173 |
2/2✓ Branch 0 (3→4) taken 98 times.
✓ Branch 1 (3→13) taken 135 times.
|
233 | if (block_iter_is_bol(bi)) { |
174 | return 0; | ||
175 | } | ||
176 | |||
177 | // These cases are handled by the condition above | ||
178 | 98 | const Block *blk = bi->blk; | |
179 | 98 | BUG_ON(offset == 0); | |
180 | 98 | BUG_ON(offset >= blk->size); | |
181 | |||
182 |
2/2✓ Branch 0 (8→9) taken 30 times.
✓ Branch 1 (8→10) taken 68 times.
|
98 | if (blk->nl == 1) { |
183 | 30 | bi->offset = 0; // Only 1 line in `blk`; bol is at offset 0 | |
184 | 30 | return offset; | |
185 | } | ||
186 | |||
187 | 68 | const unsigned char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
188 |
2/2✓ Branch 0 (10→11) taken 26 times.
✓ Branch 1 (10→12) taken 42 times.
|
68 | if (!nl) { |
189 | 26 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
190 | 26 | return offset; | |
191 | } | ||
192 | |||
193 | 42 | offset = (size_t)(nl - blk->data) + 1; | |
194 | 42 | size_t count = bi->offset - offset; | |
195 | 42 | bi->offset = offset; | |
196 | 42 | return count; | |
197 | } | ||
198 | |||
199 | 28 | size_t block_iter_eol(BlockIter *bi) | |
200 | { | ||
201 | 28 | block_iter_normalize(bi); | |
202 | 28 | const Block *blk = bi->blk; | |
203 | 28 | const size_t offset = bi->offset; | |
204 | |||
205 |
2/2✓ Branch 0 (3→4) taken 25 times.
✓ Branch 1 (3→9) taken 3 times.
|
28 | if (unlikely(offset == blk->size)) { |
206 | // Cursor at end of last block | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 |
2/2✓ Branch 0 (4→5) taken 4 times.
✓ Branch 1 (4→6) taken 21 times.
|
25 | if (blk->nl == 1) { |
211 | 4 | bi->offset = blk->size - 1; | |
212 | 4 | return bi->offset - offset; | |
213 | } | ||
214 | |||
215 | 21 | const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset); | |
216 | 21 | BUG_ON(!end); | |
217 | 21 | bi->offset = (size_t)(end - blk->data); | |
218 | 21 | return bi->offset - offset; | |
219 | } | ||
220 | |||
221 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
222 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
223 | { | ||
224 | 30 | block_iter_normalize(bi); | |
225 | 30 | const char *data = bi->blk->data; | |
226 | 30 | size_t count = 0; | |
227 | 30 | size_t i = bi->offset; | |
228 | |||
229 | // We're only operating on one line and checking for ASCII characters, | ||
230 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
231 |
1/2✓ Branch 0 (6→4) taken 52 times.
✗ Branch 1 (6→7) not taken.
|
52 | for (size_t n = bi->blk->size; i < n; count++) { |
232 | 52 | unsigned char c = data[i++]; | |
233 |
2/2✓ Branch 0 (4→5) taken 22 times.
✓ Branch 1 (4→7) taken 30 times.
|
52 | if (!ascii_isblank(c)) { |
234 | break; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | 30 | bi->offset = i; | |
239 | 30 | return count; | |
240 | } | ||
241 | |||
242 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
243 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
244 | |||
245 | { | ||
246 | 30 | block_iter_normalize(bi); | |
247 | 30 | size_t count = 0; | |
248 | 30 | size_t i = bi->offset; | |
249 | |||
250 |
1/2✓ Branch 0 (6→4) taken 61 times.
✗ Branch 1 (6→7) not taken.
|
61 | for (const char *data = bi->blk->data; i > 0; count++) { |
251 | 61 | unsigned char c = data[--i]; | |
252 |
2/2✓ Branch 0 (4→5) taken 31 times.
✓ Branch 1 (4→7) taken 30 times.
|
61 | if (!ascii_isblank(c)) { |
253 | i++; | ||
254 | break; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | 30 | bi->offset = i; | |
259 | 30 | return count; | |
260 | } | ||
261 | |||
262 | // Non-empty line can be used to determine size of indentation for the next line | ||
263 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
264 | { | ||
265 | 14 | block_iter_bol(bi); | |
266 | 18 | do { | |
267 | 18 | StringView line = block_iter_get_line(bi); | |
268 |
2/2✓ Branch 0 (4→5) taken 14 times.
✓ Branch 1 (4→6) taken 4 times.
|
18 | if (!strview_isblank(&line)) { |
269 | 14 | return true; | |
270 | } | ||
271 |
1/2✓ Branch 0 (7→3) taken 4 times.
✗ Branch 1 (7→8) not taken.
|
4 | } while (block_iter_prev_line(bi)); |
272 | return false; | ||
273 | } | ||
274 | |||
275 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
276 | { | ||
277 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 1 times.
|
1 | while (count > bi->offset) { |
278 | ✗ | count -= bi->offset; | |
279 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
280 | ✗ | bi->offset = bi->blk->size; | |
281 | } | ||
282 | 1 | bi->offset -= count; | |
283 | 1 | } | |
284 | |||
285 | 291 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
286 | { | ||
287 | 291 | size_t avail = bi->blk->size - bi->offset; | |
288 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 291 times.
|
291 | while (count > avail) { |
289 | ✗ | count -= avail; | |
290 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
291 | ✗ | bi->offset = 0; | |
292 | ✗ | avail = bi->blk->size; | |
293 | } | ||
294 | 291 | bi->offset += count; | |
295 | 291 | } | |
296 | |||
297 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
298 | { | ||
299 | 63 | Block *blk; | |
300 |
1/2✓ Branch 0 (6→3) taken 63 times.
✗ Branch 1 (6→7) not taken.
|
63 | block_for_each(blk, bi->head) { |
301 |
1/2✓ Branch 0 (3→4) taken 63 times.
✗ Branch 1 (3→5) not taken.
|
63 | if (offset <= blk->size) { |
302 | 63 | bi->blk = blk; | |
303 | 63 | bi->offset = offset; | |
304 | 63 | return; | |
305 | } | ||
306 | ✗ | offset -= blk->size; | |
307 | } | ||
308 | } | ||
309 | |||
310 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
311 | { | ||
312 | 4 | Block *blk = BLOCK(bi->head->next); | |
313 | 4 | size_t nl = 0; | |
314 |
1/4✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→6) taken 4 times.
✗ Branch 2 (5→3) not taken.
✗ Branch 3 (5→6) not taken.
|
4 | while (blk->node.next != bi->head && nl + blk->nl < line) { |
315 | ✗ | nl += blk->nl; | |
316 | ✗ | blk = BLOCK(blk->node.next); | |
317 | } | ||
318 | |||
319 | 4 | bi->blk = blk; | |
320 | 4 | bi->offset = 0; | |
321 |
2/2✓ Branch 0 (10→7) taken 8 times.
✓ Branch 1 (10→11) taken 3 times.
|
11 | while (nl < line) { |
322 |
2/2✓ Branch 0 (8→9) taken 7 times.
✓ Branch 1 (8→11) taken 1 times.
|
8 | if (!block_iter_eat_line(bi)) { |
323 | break; | ||
324 | } | ||
325 | 7 | nl++; | |
326 | } | ||
327 | 4 | } | |
328 | |||
329 | 327 | size_t block_iter_get_offset(const BlockIter *bi) | |
330 | { | ||
331 | 327 | const Block *blk; | |
332 | 327 | size_t offset = 0; | |
333 |
1/2✓ Branch 0 (5→3) taken 327 times.
✗ Branch 1 (5→6) not taken.
|
327 | block_for_each(blk, bi->head) { |
334 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 327 times.
|
327 | if (blk == bi->blk) { |
335 | break; | ||
336 | } | ||
337 | ✗ | offset += blk->size; | |
338 | } | ||
339 | 327 | return offset + bi->offset; | |
340 | } | ||
341 | |||
342 | 13 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | |
343 | { | ||
344 |
1/2✓ Branch 0 (2→3) taken 13 times.
✗ Branch 1 (2→10) not taken.
|
13 | if (len == 0) { |
345 | return NULL; | ||
346 | } | ||
347 | |||
348 | 13 | const Block *blk = bi->blk; | |
349 | 13 | size_t offset = bi->offset; | |
350 | 13 | size_t pos = 0; | |
351 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
352 | |||
353 |
2/2✓ Branch 0 (9→5) taken 13 times.
✓ Branch 1 (9→10) taken 13 times.
|
26 | while (pos < len) { |
354 | 13 | const size_t avail = blk->size - offset; | |
355 | 13 | size_t count = MIN(len - pos, avail); | |
356 | 13 | memcpy(buf + pos, blk->data + offset, count); | |
357 | 13 | pos += count; | |
358 | 13 | BUG_ON(pos < len && blk->node.next == bi->head); | |
359 | 13 | blk = BLOCK(blk->node.next); | |
360 | 13 | offset = 0; | |
361 | } | ||
362 | |||
363 | return buf; | ||
364 | } | ||
365 | |||
366 | // Return the contents of the line that extends from `bi`. Callers | ||
367 | // should ensure `bi` is already at BOL, if whole lines are needed. | ||
368 | 381 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
369 | { | ||
370 | 381 | block_iter_normalize(bi); | |
371 | 381 | StringView line = {.data = bi->blk->data + bi->offset}; | |
372 | 381 | const size_t max = bi->blk->size - bi->offset; | |
373 |
2/2✓ Branch 0 (3→4) taken 10 times.
✓ Branch 1 (3→5) taken 371 times.
|
381 | if (unlikely(max == 0)) { |
374 | // Cursor at end of last block | ||
375 | 10 | return line; | |
376 | } | ||
377 | |||
378 |
2/2✓ Branch 0 (5→6) taken 48 times.
✓ Branch 1 (5→9) taken 323 times.
|
371 | if (bi->blk->nl == 1) { |
379 | 48 | BUG_ON(line.data[max - 1] != '\n'); | |
380 | 48 | line.length = max; | |
381 | 48 | return line; | |
382 | } | ||
383 | |||
384 | 323 | const unsigned char *nl = memchr(line.data, '\n', max); | |
385 | 323 | BUG_ON(!nl); | |
386 | 323 | line.length = (size_t)(nl - line.data + 1); | |
387 | 323 | BUG_ON(line.length == 0); | |
388 | 323 | return line; | |
389 | } | ||
390 |