Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <string.h> | ||
2 | #include "block-iter.h" | ||
3 | #include "util/ascii.h" | ||
4 | #include "util/debug.h" | ||
5 | #include "util/utf8.h" | ||
6 | #include "util/xmalloc.h" | ||
7 | #include "util/xmemrchr.h" | ||
8 | |||
9 | /* | ||
10 | * Move after next newline (beginning of next line or end of file). | ||
11 | * Returns number of bytes iterator advanced. | ||
12 | */ | ||
13 | 135 | size_t block_iter_eat_line(BlockIter *bi) | |
14 | { | ||
15 | 135 | block_iter_normalize(bi); | |
16 | 135 | const size_t offset = bi->offset; | |
17 |
2/2✓ Branch 0 (3→4) taken 132 times.
✓ Branch 1 (3→10) taken 3 times.
|
135 | if (unlikely(offset == bi->blk->size)) { |
18 | return 0; | ||
19 | } | ||
20 | |||
21 | // There must be at least one newline | ||
22 |
2/2✓ Branch 0 (4→5) taken 7 times.
✓ Branch 1 (4→6) taken 125 times.
|
132 | if (bi->blk->nl == 1) { |
23 | 7 | bi->offset = bi->blk->size; | |
24 | } else { | ||
25 | 125 | const unsigned char *end; | |
26 | 125 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | |
27 | 125 | BUG_ON(!end); | |
28 | 125 | bi->offset = (size_t)(end + 1 - bi->blk->data); | |
29 | } | ||
30 | |||
31 | 132 | return bi->offset - offset; | |
32 | } | ||
33 | |||
34 | /* | ||
35 | * Move to beginning of next line. | ||
36 | * If there is no next line, iterator is not advanced. | ||
37 | * Returns number of bytes iterator advanced. | ||
38 | */ | ||
39 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
40 | { | ||
41 | 79 | block_iter_normalize(bi); | |
42 | 79 | const size_t offset = bi->offset; | |
43 |
1/2✓ Branch 0 (3→4) taken 79 times.
✗ Branch 1 (3→11) not taken.
|
79 | if (unlikely(offset == bi->blk->size)) { |
44 | return 0; | ||
45 | } | ||
46 | |||
47 | // There must be at least one newline | ||
48 | 79 | size_t new_offset; | |
49 |
1/2✓ Branch 0 (4→5) taken 79 times.
✗ Branch 1 (4→8) not taken.
|
79 | if (bi->blk->nl == 1) { |
50 | new_offset = bi->blk->size; | ||
51 | } else { | ||
52 | 79 | const unsigned char *end; | |
53 | 79 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | |
54 | 79 | BUG_ON(!end); | |
55 | 79 | new_offset = (size_t)(end + 1 - bi->blk->data); | |
56 | } | ||
57 |
3/4✓ Branch 0 (8→9) taken 15 times.
✓ Branch 1 (8→10) taken 64 times.
✗ Branch 2 (9→10) not taken.
✓ Branch 3 (9→11) taken 15 times.
|
79 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { |
58 | return 0; | ||
59 | } | ||
60 | |||
61 | 64 | bi->offset = new_offset; | |
62 | 64 | return bi->offset - offset; | |
63 | } | ||
64 | |||
65 | /* | ||
66 | * Move to beginning of previous line. | ||
67 | * Returns number of bytes moved, which is zero if there's no previous line. | ||
68 | */ | ||
69 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
70 | { | ||
71 | 101 | Block *blk = bi->blk; | |
72 | 101 | size_t offset = bi->offset; | |
73 | 101 | size_t start = offset; | |
74 | |||
75 |
4/4✓ Branch 0 (3→4) taken 217 times.
✓ Branch 1 (3→5) taken 10 times.
✓ Branch 2 (4→3) taken 126 times.
✓ Branch 3 (4→5) taken 91 times.
|
227 | while (offset && blk->data[offset - 1] != '\n') { |
76 | offset--; | ||
77 | } | ||
78 | |||
79 |
2/2✓ Branch 0 (5→6) taken 10 times.
✓ Branch 1 (5→8) taken 91 times.
|
101 | if (!offset) { |
80 |
1/2✗ Branch 0 (6→7) not taken.
✓ Branch 1 (6→12) taken 10 times.
|
10 | if (blk->node.prev == bi->head) { |
81 | return 0; | ||
82 | } | ||
83 | ✗ | bi->blk = blk = BLOCK(blk->node.prev); | |
84 | ✗ | offset = blk->size; | |
85 | ✗ | start += offset; | |
86 | } | ||
87 | |||
88 | 91 | offset--; | |
89 |
4/4✓ Branch 0 (9→10) taken 922 times.
✓ Branch 1 (9→11) taken 17 times.
✓ Branch 2 (10→9) taken 848 times.
✓ Branch 3 (10→11) taken 74 times.
|
939 | while (offset && blk->data[offset - 1] != '\n') { |
90 | offset--; | ||
91 | } | ||
92 | 91 | bi->offset = offset; | |
93 | 91 | return start - offset; | |
94 | } | ||
95 | |||
96 | 123 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
97 | { | ||
98 | 123 | BlockIter tmp = *bi; | |
99 | 123 | return block_iter_next_char(&tmp, up); | |
100 | } | ||
101 | |||
102 | 291 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
103 | { | ||
104 | 291 | size_t offset = bi->offset; | |
105 |
2/2✓ Branch 0 (2→3) taken 17 times.
✓ Branch 1 (2→5) taken 274 times.
|
291 | if (unlikely(offset == bi->blk->size)) { |
106 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 17 times.
|
17 | if (unlikely(bi->blk->node.next == bi->head)) { |
107 | return 0; | ||
108 | } | ||
109 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
110 | ✗ | bi->offset = offset = 0; | |
111 | } | ||
112 | |||
113 | // Note: this block can't be empty | ||
114 | 274 | *up = bi->blk->data[offset]; | |
115 |
2/2✓ Branch 0 (5→6) taken 272 times.
✓ Branch 1 (5→7) taken 2 times.
|
274 | if (likely(*up < 0x80)) { |
116 | 272 | bi->offset++; | |
117 | 272 | return 1; | |
118 | } | ||
119 | |||
120 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
121 | 2 | return bi->offset - offset; | |
122 | } | ||
123 | |||
124 | 94 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
125 | { | ||
126 | 94 | size_t offset = bi->offset; | |
127 |
2/2✓ Branch 0 (2→3) taken 6 times.
✓ Branch 1 (2→5) taken 88 times.
|
94 | if (unlikely(offset == 0)) { |
128 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→9) taken 6 times.
|
6 | if (unlikely(bi->blk->node.prev == bi->head)) { |
129 | return 0; | ||
130 | } | ||
131 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
132 | ✗ | bi->offset = offset = bi->blk->size; | |
133 | } | ||
134 | |||
135 | // Note: this block can't be empty | ||
136 | 88 | *up = bi->blk->data[offset - 1]; | |
137 |
1/2✓ Branch 0 (5→6) taken 88 times.
✗ Branch 1 (5→7) not taken.
|
88 | if (likely(*up < 0x80)) { |
138 | 88 | bi->offset--; | |
139 | 88 | return 1; | |
140 | } | ||
141 | |||
142 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
143 | ✗ | return offset - bi->offset; | |
144 | } | ||
145 | |||
146 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
147 | { | ||
148 | 45 | CodePoint u; | |
149 | 45 | size_t size = block_iter_next_char(bi, &u); | |
150 |
3/4✓ Branch 0 (7→8) taken 38 times.
✓ Branch 1 (7→10) taken 7 times.
✗ Branch 2 (9→4) not taken.
✓ Branch 3 (9→10) taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
151 | ✗ | size += block_iter_next_char(bi, &u); | |
152 | } | ||
153 | 45 | return size; | |
154 | } | ||
155 | |||
156 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
157 | { | ||
158 | 16 | CodePoint u; | |
159 | 16 | size_t skip, total = 0; | |
160 | 16 | do { | |
161 | 16 | skip = block_iter_prev_char(bi, &u); | |
162 | 16 | total += skip; | |
163 |
3/4✓ Branch 0 (4→5) taken 13 times.
✓ Branch 1 (4→8) taken 3 times.
✗ Branch 2 (6→7) not taken.
✓ Branch 3 (6→8) taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
164 | 16 | return total; | |
165 | } | ||
166 | |||
167 | 292 | size_t block_iter_bol(BlockIter *bi) | |
168 | { | ||
169 | 292 | block_iter_normalize(bi); | |
170 | 292 | size_t offset = bi->offset; | |
171 |
2/2✓ Branch 0 (3→4) taken 98 times.
✓ Branch 1 (3→13) taken 194 times.
|
292 | if (block_iter_is_bol(bi)) { |
172 | return 0; | ||
173 | } | ||
174 | |||
175 | // These cases are handled by the condition above | ||
176 | 98 | const Block *blk = bi->blk; | |
177 | 98 | BUG_ON(offset == 0); | |
178 | 98 | BUG_ON(offset >= blk->size); | |
179 | |||
180 |
2/2✓ Branch 0 (8→9) taken 30 times.
✓ Branch 1 (8→10) taken 68 times.
|
98 | if (blk->nl == 1) { |
181 | 30 | bi->offset = 0; // Only 1 line in `blk`; bol is at offset 0 | |
182 | 30 | return offset; | |
183 | } | ||
184 | |||
185 | 68 | const unsigned char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
186 |
2/2✓ Branch 0 (10→11) taken 26 times.
✓ Branch 1 (10→12) taken 42 times.
|
68 | if (!nl) { |
187 | 26 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
188 | 26 | return offset; | |
189 | } | ||
190 | |||
191 | 42 | offset = (size_t)(nl - blk->data) + 1; | |
192 | 42 | size_t count = bi->offset - offset; | |
193 | 42 | bi->offset = offset; | |
194 | 42 | return count; | |
195 | } | ||
196 | |||
197 | 28 | size_t block_iter_eol(BlockIter *bi) | |
198 | { | ||
199 | 28 | block_iter_normalize(bi); | |
200 | 28 | const Block *blk = bi->blk; | |
201 | 28 | const size_t offset = bi->offset; | |
202 | |||
203 |
2/2✓ Branch 0 (3→4) taken 25 times.
✓ Branch 1 (3→9) taken 3 times.
|
28 | if (unlikely(offset == blk->size)) { |
204 | // Cursor at end of last block | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 |
2/2✓ Branch 0 (4→5) taken 4 times.
✓ Branch 1 (4→6) taken 21 times.
|
25 | if (blk->nl == 1) { |
209 | 4 | bi->offset = blk->size - 1; | |
210 | 4 | return bi->offset - offset; | |
211 | } | ||
212 | |||
213 | 21 | const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset); | |
214 | 21 | BUG_ON(!end); | |
215 | 21 | bi->offset = (size_t)(end - blk->data); | |
216 | 21 | return bi->offset - offset; | |
217 | } | ||
218 | |||
219 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
220 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
221 | { | ||
222 | 30 | block_iter_normalize(bi); | |
223 | 30 | const char *data = bi->blk->data; | |
224 | 30 | size_t count = 0; | |
225 | 30 | size_t i = bi->offset; | |
226 | |||
227 | // We're only operating on one line and checking for ASCII characters, | ||
228 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
229 |
1/2✓ Branch 0 (6→4) taken 52 times.
✗ Branch 1 (6→7) not taken.
|
52 | for (size_t n = bi->blk->size; i < n; count++) { |
230 | 52 | unsigned char c = data[i++]; | |
231 |
2/2✓ Branch 0 (4→5) taken 22 times.
✓ Branch 1 (4→7) taken 30 times.
|
52 | if (!ascii_isblank(c)) { |
232 | break; | ||
233 | } | ||
234 | } | ||
235 | |||
236 | 30 | bi->offset = i; | |
237 | 30 | return count; | |
238 | } | ||
239 | |||
240 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
241 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
242 | |||
243 | { | ||
244 | 30 | block_iter_normalize(bi); | |
245 | 30 | size_t count = 0; | |
246 | 30 | size_t i = bi->offset; | |
247 | |||
248 |
1/2✓ Branch 0 (6→4) taken 61 times.
✗ Branch 1 (6→7) not taken.
|
61 | for (const char *data = bi->blk->data; i > 0; count++) { |
249 | 61 | unsigned char c = data[--i]; | |
250 |
2/2✓ Branch 0 (4→5) taken 31 times.
✓ Branch 1 (4→7) taken 30 times.
|
61 | if (!ascii_isblank(c)) { |
251 | i++; | ||
252 | break; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | 30 | bi->offset = i; | |
257 | 30 | return count; | |
258 | } | ||
259 | |||
260 | // Non-empty line can be used to determine size of indentation for the next line | ||
261 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
262 | { | ||
263 | 14 | block_iter_bol(bi); | |
264 | 18 | do { | |
265 | 18 | StringView line = block_iter_get_line(bi); | |
266 |
2/2✓ Branch 0 (4→5) taken 14 times.
✓ Branch 1 (4→6) taken 4 times.
|
18 | if (!strview_isblank(&line)) { |
267 | 14 | return true; | |
268 | } | ||
269 |
1/2✓ Branch 0 (7→3) taken 4 times.
✗ Branch 1 (7→8) not taken.
|
4 | } while (block_iter_prev_line(bi)); |
270 | return false; | ||
271 | } | ||
272 | |||
273 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
274 | { | ||
275 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 1 times.
|
1 | while (count > bi->offset) { |
276 | ✗ | count -= bi->offset; | |
277 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
278 | ✗ | bi->offset = bi->blk->size; | |
279 | } | ||
280 | 1 | bi->offset -= count; | |
281 | 1 | } | |
282 | |||
283 | 291 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
284 | { | ||
285 | 291 | size_t avail = bi->blk->size - bi->offset; | |
286 |
1/2✗ Branch 0 (4→3) not taken.
✓ Branch 1 (4→5) taken 291 times.
|
291 | while (count > avail) { |
287 | ✗ | count -= avail; | |
288 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
289 | ✗ | bi->offset = 0; | |
290 | ✗ | avail = bi->blk->size; | |
291 | } | ||
292 | 291 | bi->offset += count; | |
293 | 291 | } | |
294 | |||
295 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
296 | { | ||
297 | 63 | Block *blk; | |
298 |
1/2✓ Branch 0 (6→3) taken 63 times.
✗ Branch 1 (6→7) not taken.
|
63 | block_for_each(blk, bi->head) { |
299 |
1/2✓ Branch 0 (3→4) taken 63 times.
✗ Branch 1 (3→5) not taken.
|
63 | if (offset <= blk->size) { |
300 | 63 | bi->blk = blk; | |
301 | 63 | bi->offset = offset; | |
302 | 63 | return; | |
303 | } | ||
304 | ✗ | offset -= blk->size; | |
305 | } | ||
306 | } | ||
307 | |||
308 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
309 | { | ||
310 | 4 | Block *blk = BLOCK(bi->head->next); | |
311 | 4 | size_t nl = 0; | |
312 |
1/4✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→6) taken 4 times.
✗ Branch 2 (5→3) not taken.
✗ Branch 3 (5→6) not taken.
|
4 | while (blk->node.next != bi->head && nl + blk->nl < line) { |
313 | ✗ | nl += blk->nl; | |
314 | ✗ | blk = BLOCK(blk->node.next); | |
315 | } | ||
316 | |||
317 | 4 | bi->blk = blk; | |
318 | 4 | bi->offset = 0; | |
319 |
2/2✓ Branch 0 (10→7) taken 8 times.
✓ Branch 1 (10→11) taken 3 times.
|
11 | while (nl < line) { |
320 |
2/2✓ Branch 0 (8→9) taken 7 times.
✓ Branch 1 (8→11) taken 1 times.
|
8 | if (!block_iter_eat_line(bi)) { |
321 | break; | ||
322 | } | ||
323 | 7 | nl++; | |
324 | } | ||
325 | 4 | } | |
326 | |||
327 | 327 | size_t block_iter_get_offset(const BlockIter *bi) | |
328 | { | ||
329 | 327 | const Block *blk; | |
330 | 327 | size_t offset = 0; | |
331 |
1/2✓ Branch 0 (5→3) taken 327 times.
✗ Branch 1 (5→6) not taken.
|
327 | block_for_each(blk, bi->head) { |
332 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→6) taken 327 times.
|
327 | if (blk == bi->blk) { |
333 | break; | ||
334 | } | ||
335 | ✗ | offset += blk->size; | |
336 | } | ||
337 | 327 | return offset + bi->offset; | |
338 | } | ||
339 | |||
340 | 13 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | |
341 | { | ||
342 |
1/2✓ Branch 0 (2→3) taken 13 times.
✗ Branch 1 (2→10) not taken.
|
13 | if (len == 0) { |
343 | return NULL; | ||
344 | } | ||
345 | |||
346 | 13 | const Block *blk = bi->blk; | |
347 | 13 | size_t offset = bi->offset; | |
348 | 13 | size_t pos = 0; | |
349 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
350 | |||
351 |
2/2✓ Branch 0 (9→5) taken 13 times.
✓ Branch 1 (9→10) taken 13 times.
|
26 | while (pos < len) { |
352 | 13 | const size_t avail = blk->size - offset; | |
353 | 13 | size_t count = MIN(len - pos, avail); | |
354 | 13 | memcpy(buf + pos, blk->data + offset, count); | |
355 | 13 | pos += count; | |
356 | 13 | BUG_ON(pos < len && blk->node.next == bi->head); | |
357 | 13 | blk = BLOCK(blk->node.next); | |
358 | 13 | offset = 0; | |
359 | } | ||
360 | |||
361 | return buf; | ||
362 | } | ||
363 | |||
364 | // bi should be at bol | ||
365 | 381 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
366 | { | ||
367 | 381 | block_iter_normalize(bi); | |
368 | 381 | StringView line = {.data = bi->blk->data + bi->offset}; | |
369 | 381 | const size_t max = bi->blk->size - bi->offset; | |
370 |
2/2✓ Branch 0 (3→4) taken 10 times.
✓ Branch 1 (3→5) taken 371 times.
|
381 | if (unlikely(max == 0)) { |
371 | // Cursor at end of last block | ||
372 | 10 | return line; | |
373 | } | ||
374 | |||
375 |
2/2✓ Branch 0 (5→6) taken 48 times.
✓ Branch 1 (5→9) taken 323 times.
|
371 | if (bi->blk->nl == 1) { |
376 | 48 | BUG_ON(line.data[max - 1] != '\n'); | |
377 | 48 | line.length = max; | |
378 | 48 | return line; | |
379 | } | ||
380 | |||
381 | 323 | const unsigned char *nl = memchr(line.data, '\n', max); | |
382 | 323 | BUG_ON(!nl); | |
383 | 323 | line.length = (size_t)(nl - line.data + 1); | |
384 | 323 | BUG_ON(line.length == 0); | |
385 | 323 | return line; | |
386 | } | ||
387 | |||
388 | 374 | StringView block_iter_get_line(BlockIter *bi) | |
389 | { | ||
390 | 374 | StringView line = block_iter_get_line_with_nl(bi); | |
391 | 374 | line.length -= (line.length > 0); // Trim the newline | |
392 | 374 | return line; | |
393 | } | ||
394 | |||
395 | // Set the `line` argument to point to the current line and return | ||
396 | // the offset of the cursor, relative to the start of the line | ||
397 | // (zero means cursor is at bol) | ||
398 | 128 | size_t fetch_this_line(const BlockIter *bi, StringView *line) | |
399 | { | ||
400 | 128 | BlockIter tmp = *bi; | |
401 | 128 | size_t count = block_iter_bol(&tmp); | |
402 | 128 | *line = block_iter_get_line(&tmp); | |
403 | 128 | return count; | |
404 | } | ||
405 |