| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <string.h> | ||
| 2 | #include "block-iter.h" | ||
| 3 | #include "util/ascii.h" | ||
| 4 | #include "util/debug.h" | ||
| 5 | #include "util/utf8.h" | ||
| 6 | #include "util/xmalloc.h" | ||
| 7 | #include "util/xmemrchr.h" | ||
| 8 | |||
| 9 | NONNULL_ARGS_AND_RETURN | ||
| 10 | 564 | static const char *block_memchr_eol(const Block *blk, size_t offset) | |
| 11 | { | ||
| 12 | 564 | BUG_ON(offset > blk->size); | |
| 13 | 564 | const char *eol = memchr(blk->data + offset, '\n', blk->size - offset); | |
| 14 | 564 | BUG_ON(!eol); | |
| 15 | 564 | return eol; | |
| 16 | } | ||
| 17 | |||
| 18 | // Move after next newline (beginning of next line or end of file) | ||
| 19 | // and return number of bytes moved | ||
| 20 | 140 | size_t block_iter_eat_line(BlockIter *bi) | |
| 21 | { | ||
| 22 | 140 | block_iter_normalize(bi); | |
| 23 | 140 | const size_t offset = bi->offset; | |
| 24 |
2/2✓ Branch 3 → 4 taken 137 times.
✓ Branch 3 → 9 taken 3 times.
|
140 | if (unlikely(offset == bi->blk->size)) { |
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | // There must be at least one newline | ||
| 29 |
2/2✓ Branch 4 → 5 taken 7 times.
✓ Branch 4 → 6 taken 130 times.
|
137 | if (bi->blk->nl == 1) { |
| 30 | 7 | bi->offset = bi->blk->size; | |
| 31 | } else { | ||
| 32 | 130 | const char *end = block_memchr_eol(bi->blk, offset); | |
| 33 | 130 | bi->offset = (size_t)(end + 1 - bi->blk->data); | |
| 34 | } | ||
| 35 | |||
| 36 | 137 | return bi->offset - offset; | |
| 37 | } | ||
| 38 | |||
| 39 | // Move to beginning of next line (if any) and return number of bytes moved | ||
| 40 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
| 41 | { | ||
| 42 | 79 | block_iter_normalize(bi); | |
| 43 | 79 | const size_t offset = bi->offset; | |
| 44 |
1/2✓ Branch 3 → 4 taken 79 times.
✗ Branch 3 → 10 not taken.
|
79 | if (unlikely(offset == bi->blk->size)) { |
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | // There must be at least one newline | ||
| 49 | 79 | size_t new_offset; | |
| 50 |
1/2✓ Branch 4 → 5 taken 79 times.
✗ Branch 4 → 7 not taken.
|
79 | if (bi->blk->nl == 1) { |
| 51 | new_offset = bi->blk->size; | ||
| 52 | } else { | ||
| 53 | 79 | const char *end = block_memchr_eol(bi->blk, offset); | |
| 54 | 79 | new_offset = (size_t)(end + 1 - bi->blk->data); | |
| 55 | } | ||
| 56 | |||
| 57 |
3/4✓ Branch 7 → 8 taken 15 times.
✓ Branch 7 → 9 taken 64 times.
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 15 times.
|
79 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { |
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | 64 | bi->offset = new_offset; | |
| 62 | 64 | return bi->offset - offset; | |
| 63 | } | ||
| 64 | |||
| 65 | // Move to end of previous line (if any) and return number of bytes moved | ||
| 66 | 101 | size_t block_iter_prev_line_eol(BlockIter *bi) | |
| 67 | { | ||
| 68 | 101 | CodePoint u; | |
| 69 | 101 | BlockIter tmp = *bi; | |
| 70 | 101 | size_t n1 = block_iter_bol(&tmp); | |
| 71 | 101 | size_t n2 = block_iter_prev_char(&tmp, &u); | |
| 72 |
2/2✓ Branch 4 → 5 taken 91 times.
✓ Branch 4 → 10 taken 10 times.
|
101 | if (n2 == 0) { |
| 73 | // block_iter_bol() moved to BOF, which means there's no previous | ||
| 74 | // line and we leave `bi` unchanged | ||
| 75 | return 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | // Otherwise, block_iter_prev_char() moved to the previous line's | ||
| 79 | // EOL, so we set the in-out parameter to reflect the temporary | ||
| 80 | // iterator and and return the sum of the 2 movements | ||
| 81 | 91 | BUG_ON(n2 != 1); | |
| 82 | 91 | BUG_ON(u != '\n'); | |
| 83 | 91 | *bi = tmp; | |
| 84 | 91 | return n1 + n2; | |
| 85 | } | ||
| 86 | |||
| 87 | // Move to beginning of previous line (if any) and return number of bytes moved | ||
| 88 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
| 89 | { | ||
| 90 | 101 | size_t n = block_iter_prev_line_eol(bi); | |
| 91 |
2/2✓ Branch 3 → 4 taken 91 times.
✓ Branch 3 → 6 taken 10 times.
|
101 | return n ? n + block_iter_bol(bi) : 0; |
| 92 | } | ||
| 93 | |||
| 94 | 126 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
| 95 | { | ||
| 96 | 126 | BlockIter tmp = *bi; | |
| 97 | 126 | return block_iter_next_char(&tmp, up); | |
| 98 | } | ||
| 99 | |||
| 100 | 294 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
| 101 | { | ||
| 102 | 294 | size_t offset = bi->offset; | |
| 103 |
2/2✓ Branch 2 → 3 taken 17 times.
✓ Branch 2 → 5 taken 277 times.
|
294 | if (unlikely(offset == bi->blk->size)) { |
| 104 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 9 taken 17 times.
|
17 | if (unlikely(bi->blk->node.next == bi->head)) { |
| 105 | return 0; | ||
| 106 | } | ||
| 107 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
| 108 | ✗ | bi->offset = offset = 0; | |
| 109 | } | ||
| 110 | |||
| 111 | // Note: this block can't be empty | ||
| 112 | 277 | unsigned char byte = bi->blk->data[offset]; | |
| 113 |
2/2✓ Branch 5 → 6 taken 275 times.
✓ Branch 5 → 7 taken 2 times.
|
277 | if (likely(byte < 0x80)) { |
| 114 | 275 | *up = byte; | |
| 115 | 275 | bi->offset++; | |
| 116 | 275 | return 1; | |
| 117 | } | ||
| 118 | |||
| 119 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
| 120 | 2 | return bi->offset - offset; | |
| 121 | } | ||
| 122 | |||
| 123 | 195 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
| 124 | { | ||
| 125 | 195 | size_t offset = bi->offset; | |
| 126 |
2/2✓ Branch 2 → 3 taken 16 times.
✓ Branch 2 → 5 taken 179 times.
|
195 | if (unlikely(offset == 0)) { |
| 127 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 9 taken 16 times.
|
16 | if (unlikely(bi->blk->node.prev == bi->head)) { |
| 128 | return 0; | ||
| 129 | } | ||
| 130 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
| 131 | ✗ | bi->offset = offset = bi->blk->size; | |
| 132 | } | ||
| 133 | |||
| 134 | // Note: this block can't be empty | ||
| 135 | 179 | unsigned char byte = bi->blk->data[offset - 1]; | |
| 136 |
1/2✓ Branch 5 → 6 taken 179 times.
✗ Branch 5 → 7 not taken.
|
179 | if (likely(byte < 0x80)) { |
| 137 | 179 | *up = byte; | |
| 138 | 179 | bi->offset--; | |
| 139 | 179 | return 1; | |
| 140 | } | ||
| 141 | |||
| 142 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
| 143 | ✗ | return offset - bi->offset; | |
| 144 | } | ||
| 145 | |||
| 146 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
| 147 | { | ||
| 148 | 45 | CodePoint u; | |
| 149 | 45 | size_t size = block_iter_next_char(bi, &u); | |
| 150 |
3/4✓ Branch 7 → 8 taken 38 times.
✓ Branch 7 → 10 taken 7 times.
✗ Branch 9 → 4 not taken.
✓ Branch 9 → 10 taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
| 151 | ✗ | size += block_iter_next_char(bi, &u); | |
| 152 | } | ||
| 153 | 45 | return size; | |
| 154 | } | ||
| 155 | |||
| 156 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
| 157 | { | ||
| 158 | 16 | CodePoint u; | |
| 159 | 16 | size_t skip, total = 0; | |
| 160 | 16 | do { | |
| 161 | 16 | skip = block_iter_prev_char(bi, &u); | |
| 162 | 16 | total += skip; | |
| 163 |
3/4✓ Branch 4 → 5 taken 13 times.
✓ Branch 4 → 8 taken 3 times.
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
| 164 | 16 | return total; | |
| 165 | } | ||
| 166 | |||
| 167 | 436 | size_t block_iter_bol(BlockIter *bi) | |
| 168 | { | ||
| 169 | 436 | block_iter_normalize(bi); | |
| 170 | 436 | size_t offset = bi->offset; | |
| 171 |
2/2✓ Branch 3 → 4 taken 205 times.
✓ Branch 3 → 13 taken 231 times.
|
436 | if (block_iter_is_bol(bi)) { |
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | // These cases are handled by the condition above | ||
| 176 | 205 | const Block *blk = bi->blk; | |
| 177 | 205 | BUG_ON(offset == 0); | |
| 178 | 205 | BUG_ON(offset >= blk->size); | |
| 179 | |||
| 180 |
2/2✓ Branch 8 → 9 taken 31 times.
✓ Branch 8 → 10 taken 174 times.
|
205 | if (blk->nl == 1) { |
| 181 | 31 | bi->offset = 0; // Only 1 line in `blk`; bol is at offset 0 | |
| 182 | 31 | return offset; | |
| 183 | } | ||
| 184 | |||
| 185 | 174 | const char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
| 186 |
2/2✓ Branch 10 → 11 taken 45 times.
✓ Branch 10 → 12 taken 129 times.
|
174 | if (!nl) { |
| 187 | 45 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
| 188 | 45 | return offset; | |
| 189 | } | ||
| 190 | |||
| 191 | 129 | offset = (size_t)(nl - blk->data) + 1; | |
| 192 | 129 | size_t count = bi->offset - offset; | |
| 193 | 129 | bi->offset = offset; | |
| 194 | 129 | return count; | |
| 195 | } | ||
| 196 | |||
| 197 | 28 | size_t block_iter_eol(BlockIter *bi) | |
| 198 | { | ||
| 199 | 28 | block_iter_normalize(bi); | |
| 200 | 28 | const Block *blk = bi->blk; | |
| 201 | 28 | const size_t offset = bi->offset; | |
| 202 | |||
| 203 |
2/2✓ Branch 3 → 4 taken 25 times.
✓ Branch 3 → 8 taken 3 times.
|
28 | if (unlikely(offset == blk->size)) { |
| 204 | // Cursor at end of last block | ||
| 205 | return 0; | ||
| 206 | } | ||
| 207 | |||
| 208 |
2/2✓ Branch 4 → 5 taken 4 times.
✓ Branch 4 → 6 taken 21 times.
|
25 | if (blk->nl == 1) { |
| 209 | 4 | bi->offset = blk->size - 1; | |
| 210 | 4 | return bi->offset - offset; | |
| 211 | } | ||
| 212 | |||
| 213 | 21 | const char *end = block_memchr_eol(blk, offset); | |
| 214 | 21 | bi->offset = (size_t)(end - blk->data); | |
| 215 | 21 | return bi->offset - offset; | |
| 216 | } | ||
| 217 | |||
| 218 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
| 219 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
| 220 | { | ||
| 221 | 30 | block_iter_normalize(bi); | |
| 222 | 30 | const char *data = bi->blk->data; | |
| 223 | 30 | size_t count = 0; | |
| 224 | 30 | size_t i = bi->offset; | |
| 225 | |||
| 226 | // We're only operating on one line and checking for ASCII characters, | ||
| 227 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
| 228 |
1/2✓ Branch 6 → 4 taken 52 times.
✗ Branch 6 → 7 not taken.
|
52 | for (size_t n = bi->blk->size; i < n; count++) { |
| 229 | 52 | unsigned char c = data[i++]; | |
| 230 |
2/2✓ Branch 4 → 5 taken 22 times.
✓ Branch 4 → 7 taken 30 times.
|
52 | if (!ascii_isblank(c)) { |
| 231 | break; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | 30 | bi->offset = i; | |
| 236 | 30 | return count; | |
| 237 | } | ||
| 238 | |||
| 239 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
| 240 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
| 241 | |||
| 242 | { | ||
| 243 | 30 | block_iter_normalize(bi); | |
| 244 | 30 | size_t count = 0; | |
| 245 | 30 | size_t i = bi->offset; | |
| 246 | |||
| 247 |
1/2✓ Branch 6 → 4 taken 61 times.
✗ Branch 6 → 7 not taken.
|
61 | for (const char *data = bi->blk->data; i > 0; count++) { |
| 248 | 61 | unsigned char c = data[--i]; | |
| 249 |
2/2✓ Branch 4 → 5 taken 31 times.
✓ Branch 4 → 7 taken 30 times.
|
61 | if (!ascii_isblank(c)) { |
| 250 | i++; | ||
| 251 | break; | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | 30 | bi->offset = i; | |
| 256 | 30 | return count; | |
| 257 | } | ||
| 258 | |||
| 259 | // Non-empty line can be used to determine size of indentation for the next line | ||
| 260 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
| 261 | { | ||
| 262 | 14 | block_iter_bol(bi); | |
| 263 | 18 | do { | |
| 264 | 18 | StringView line = block_iter_get_line(bi); | |
| 265 |
2/2✓ Branch 4 → 5 taken 14 times.
✓ Branch 4 → 6 taken 4 times.
|
18 | if (!strview_isblank(line)) { |
| 266 | 14 | return true; | |
| 267 | } | ||
| 268 |
1/2✓ Branch 7 → 3 taken 4 times.
✗ Branch 7 → 8 not taken.
|
4 | } while (block_iter_prev_line(bi)); |
| 269 | return false; | ||
| 270 | } | ||
| 271 | |||
| 272 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
| 273 | { | ||
| 274 |
1/2✗ Branch 4 → 3 not taken.
✓ Branch 4 → 5 taken 1 time.
|
1 | while (count > bi->offset) { |
| 275 | ✗ | count -= bi->offset; | |
| 276 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
| 277 | ✗ | bi->offset = bi->blk->size; | |
| 278 | } | ||
| 279 | 1 | bi->offset -= count; | |
| 280 | 1 | } | |
| 281 | |||
| 282 | 342 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
| 283 | { | ||
| 284 | 342 | size_t avail = bi->blk->size - bi->offset; | |
| 285 |
1/2✗ Branch 4 → 3 not taken.
✓ Branch 4 → 5 taken 342 times.
|
342 | while (count > avail) { |
| 286 | ✗ | count -= avail; | |
| 287 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
| 288 | ✗ | bi->offset = 0; | |
| 289 | ✗ | avail = bi->blk->size; | |
| 290 | } | ||
| 291 | 342 | bi->offset += count; | |
| 292 | 342 | } | |
| 293 | |||
| 294 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
| 295 | { | ||
| 296 | 63 | Block *blk; | |
| 297 |
1/2✓ Branch 6 → 3 taken 63 times.
✗ Branch 6 → 7 not taken.
|
63 | block_for_each(blk, bi->head) { |
| 298 |
1/2✓ Branch 3 → 4 taken 63 times.
✗ Branch 3 → 5 not taken.
|
63 | if (offset <= blk->size) { |
| 299 | 63 | bi->blk = blk; | |
| 300 | 63 | bi->offset = offset; | |
| 301 | 63 | return; | |
| 302 | } | ||
| 303 | ✗ | offset -= blk->size; | |
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
| 308 | { | ||
| 309 | 4 | Block *blk = BLOCK(bi->head->next); | |
| 310 | 4 | size_t nl = 0; | |
| 311 |
1/4✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 4 times.
✗ Branch 5 → 3 not taken.
✗ Branch 5 → 6 not taken.
|
4 | while (blk->node.next != bi->head && nl + blk->nl < line) { |
| 312 | ✗ | nl += blk->nl; | |
| 313 | ✗ | blk = BLOCK(blk->node.next); | |
| 314 | } | ||
| 315 | |||
| 316 | 4 | bi->blk = blk; | |
| 317 | 4 | bi->offset = 0; | |
| 318 |
2/2✓ Branch 10 → 7 taken 8 times.
✓ Branch 10 → 11 taken 3 times.
|
11 | while (nl < line) { |
| 319 |
2/2✓ Branch 8 → 9 taken 7 times.
✓ Branch 8 → 11 taken 1 time.
|
8 | if (!block_iter_eat_line(bi)) { |
| 320 | break; | ||
| 321 | } | ||
| 322 | 7 | nl++; | |
| 323 | } | ||
| 324 | 4 | } | |
| 325 | |||
| 326 | 337 | size_t block_iter_get_offset(const BlockIter *bi) | |
| 327 | { | ||
| 328 | 337 | const Block *blk; | |
| 329 | 337 | size_t offset = 0; | |
| 330 |
1/2✓ Branch 5 → 3 taken 337 times.
✗ Branch 5 → 6 not taken.
|
337 | block_for_each(blk, bi->head) { |
| 331 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 337 times.
|
337 | if (blk == bi->blk) { |
| 332 | break; | ||
| 333 | } | ||
| 334 | ✗ | offset += blk->size; | |
| 335 | } | ||
| 336 | 337 | return offset + bi->offset; | |
| 337 | } | ||
| 338 | |||
| 339 | 13 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | |
| 340 | { | ||
| 341 |
1/2✓ Branch 2 → 3 taken 13 times.
✗ Branch 2 → 10 not taken.
|
13 | if (len == 0) { |
| 342 | return NULL; | ||
| 343 | } | ||
| 344 | |||
| 345 | 13 | const Block *blk = bi->blk; | |
| 346 | 13 | size_t offset = bi->offset; | |
| 347 | 13 | size_t pos = 0; | |
| 348 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
| 349 | |||
| 350 |
2/2✓ Branch 9 → 5 taken 13 times.
✓ Branch 9 → 10 taken 13 times.
|
26 | while (pos < len) { |
| 351 | 13 | const size_t avail = blk->size - offset; | |
| 352 | 13 | size_t count = MIN(len - pos, avail); | |
| 353 | 13 | memcpy(buf + pos, blk->data + offset, count); | |
| 354 | 13 | pos += count; | |
| 355 | 13 | BUG_ON(pos < len && blk->node.next == bi->head); | |
| 356 | 13 | blk = BLOCK(blk->node.next); | |
| 357 | 13 | offset = 0; | |
| 358 | } | ||
| 359 | |||
| 360 | return buf; | ||
| 361 | } | ||
| 362 | |||
| 363 | // Return the contents of the line that extends from `bi`. Callers | ||
| 364 | // should ensure `bi` is already at BOL, if whole lines are needed. | ||
| 365 | 392 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
| 366 | { | ||
| 367 | 392 | block_iter_normalize(bi); | |
| 368 | 392 | const char *start = bi->blk->data + bi->offset; | |
| 369 | 392 | StringView line = {.data = start, .length = 0}; | |
| 370 | 392 | size_t max = bi->blk->size - bi->offset; | |
| 371 | |||
| 372 |
2/2✓ Branch 3 → 4 taken 10 times.
✓ Branch 3 → 5 taken 382 times.
|
392 | if (unlikely(max == 0)) { |
| 373 | // Cursor at end of last block | ||
| 374 | 10 | return line; | |
| 375 | } | ||
| 376 | |||
| 377 |
2/2✓ Branch 5 → 6 taken 48 times.
✓ Branch 5 → 9 taken 334 times.
|
382 | if (bi->blk->nl == 1) { |
| 378 | // Block contains only 1 line; end-of-line is end-of-block | ||
| 379 | 48 | BUG_ON(line.data[max - 1] != '\n'); | |
| 380 | 48 | line.length = max; | |
| 381 | 48 | return line; | |
| 382 | } | ||
| 383 | |||
| 384 | 334 | const char *nl = block_memchr_eol(bi->blk, bi->offset); | |
| 385 | 334 | line.length = (size_t)(nl - start + 1); | |
| 386 | 334 | BUG_ON(line.length == 0); | |
| 387 | 334 | return line; | |
| 388 | } | ||
| 389 |