| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <string.h> | ||
| 2 | #include "block-iter.h" | ||
| 3 | #include "util/ascii.h" | ||
| 4 | #include "util/debug.h" | ||
| 5 | #include "util/utf8.h" | ||
| 6 | #include "util/xmalloc.h" | ||
| 7 | #include "util/xmemrchr.h" | ||
| 8 | |||
| 9 | NONNULL_ARGS_AND_RETURN | ||
| 10 | 548 | static const char *block_memchr_eol(const Block *blk, size_t offset) | |
| 11 | { | ||
| 12 | 548 | BUG_ON(offset > blk->size); | |
| 13 | 548 | const char *eol = memchr(blk->data + offset, '\n', blk->size - offset); | |
| 14 | 548 | BUG_ON(!eol); | |
| 15 | 548 | return eol; | |
| 16 | } | ||
| 17 | |||
| 18 | // Move after next newline (beginning of next line or end of file) | ||
| 19 | // and return number of bytes moved | ||
| 20 | 135 | size_t block_iter_eat_line(BlockIter *bi) | |
| 21 | { | ||
| 22 | 135 | block_iter_normalize(bi); | |
| 23 | 135 | const size_t offset = bi->offset; | |
| 24 |
2/2✓ Branch 3 → 4 taken 132 times.
✓ Branch 3 → 9 taken 3 times.
|
135 | if (unlikely(offset == bi->blk->size)) { |
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | // There must be at least one newline | ||
| 29 |
2/2✓ Branch 4 → 5 taken 7 times.
✓ Branch 4 → 6 taken 125 times.
|
132 | if (bi->blk->nl == 1) { |
| 30 | 7 | bi->offset = bi->blk->size; | |
| 31 | } else { | ||
| 32 | 125 | const char *end = block_memchr_eol(bi->blk, offset); | |
| 33 | 125 | bi->offset = (size_t)(end + 1 - bi->blk->data); | |
| 34 | } | ||
| 35 | |||
| 36 | 132 | return bi->offset - offset; | |
| 37 | } | ||
| 38 | |||
| 39 | // Move to beginning of next line (if any) and return number of bytes moved | ||
| 40 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
| 41 | { | ||
| 42 | 79 | block_iter_normalize(bi); | |
| 43 | 79 | const size_t offset = bi->offset; | |
| 44 |
1/2✓ Branch 3 → 4 taken 79 times.
✗ Branch 3 → 10 not taken.
|
79 | if (unlikely(offset == bi->blk->size)) { |
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | // There must be at least one newline | ||
| 49 | 79 | size_t new_offset; | |
| 50 |
1/2✓ Branch 4 → 5 taken 79 times.
✗ Branch 4 → 7 not taken.
|
79 | if (bi->blk->nl == 1) { |
| 51 | new_offset = bi->blk->size; | ||
| 52 | } else { | ||
| 53 | 79 | const char *end = block_memchr_eol(bi->blk, offset); | |
| 54 | 79 | new_offset = (size_t)(end + 1 - bi->blk->data); | |
| 55 | } | ||
| 56 | |||
| 57 |
3/4✓ Branch 7 → 8 taken 15 times.
✓ Branch 7 → 9 taken 64 times.
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 15 times.
|
79 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { |
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | 64 | bi->offset = new_offset; | |
| 62 | 64 | return bi->offset - offset; | |
| 63 | } | ||
| 64 | |||
| 65 | // Move to beginning of previous line (if any) and return number of bytes moved | ||
| 66 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
| 67 | { | ||
| 68 | 101 | Block *blk = bi->blk; | |
| 69 | 101 | size_t offset = bi->offset; | |
| 70 | 101 | size_t start = offset; | |
| 71 | |||
| 72 |
4/4✓ Branch 3 → 4 taken 217 times.
✓ Branch 3 → 5 taken 10 times.
✓ Branch 4 → 3 taken 126 times.
✓ Branch 4 → 5 taken 91 times.
|
227 | while (offset && blk->data[offset - 1] != '\n') { |
| 73 | offset--; | ||
| 74 | } | ||
| 75 | |||
| 76 |
2/2✓ Branch 5 → 6 taken 10 times.
✓ Branch 5 → 8 taken 91 times.
|
101 | if (!offset) { |
| 77 |
1/2✗ Branch 6 → 7 not taken.
✓ Branch 6 → 12 taken 10 times.
|
10 | if (blk->node.prev == bi->head) { |
| 78 | return 0; | ||
| 79 | } | ||
| 80 | ✗ | bi->blk = blk = BLOCK(blk->node.prev); | |
| 81 | ✗ | offset = blk->size; | |
| 82 | ✗ | start += offset; | |
| 83 | } | ||
| 84 | |||
| 85 | 91 | offset--; | |
| 86 |
4/4✓ Branch 9 → 10 taken 922 times.
✓ Branch 9 → 11 taken 17 times.
✓ Branch 10 → 9 taken 848 times.
✓ Branch 10 → 11 taken 74 times.
|
939 | while (offset && blk->data[offset - 1] != '\n') { |
| 87 | offset--; | ||
| 88 | } | ||
| 89 | 91 | bi->offset = offset; | |
| 90 | 91 | return start - offset; | |
| 91 | } | ||
| 92 | |||
| 93 | 123 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
| 94 | { | ||
| 95 | 123 | BlockIter tmp = *bi; | |
| 96 | 123 | return block_iter_next_char(&tmp, up); | |
| 97 | } | ||
| 98 | |||
| 99 | 291 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
| 100 | { | ||
| 101 | 291 | size_t offset = bi->offset; | |
| 102 |
2/2✓ Branch 2 → 3 taken 17 times.
✓ Branch 2 → 5 taken 274 times.
|
291 | if (unlikely(offset == bi->blk->size)) { |
| 103 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 9 taken 17 times.
|
17 | if (unlikely(bi->blk->node.next == bi->head)) { |
| 104 | return 0; | ||
| 105 | } | ||
| 106 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
| 107 | ✗ | bi->offset = offset = 0; | |
| 108 | } | ||
| 109 | |||
| 110 | // Note: this block can't be empty | ||
| 111 | 274 | unsigned char byte = bi->blk->data[offset]; | |
| 112 |
2/2✓ Branch 5 → 6 taken 272 times.
✓ Branch 5 → 7 taken 2 times.
|
274 | if (likely(byte < 0x80)) { |
| 113 | 272 | *up = byte; | |
| 114 | 272 | bi->offset++; | |
| 115 | 272 | return 1; | |
| 116 | } | ||
| 117 | |||
| 118 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
| 119 | 2 | return bi->offset - offset; | |
| 120 | } | ||
| 121 | |||
| 122 | 94 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
| 123 | { | ||
| 124 | 94 | size_t offset = bi->offset; | |
| 125 |
2/2✓ Branch 2 → 3 taken 6 times.
✓ Branch 2 → 5 taken 88 times.
|
94 | if (unlikely(offset == 0)) { |
| 126 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 9 taken 6 times.
|
6 | if (unlikely(bi->blk->node.prev == bi->head)) { |
| 127 | return 0; | ||
| 128 | } | ||
| 129 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
| 130 | ✗ | bi->offset = offset = bi->blk->size; | |
| 131 | } | ||
| 132 | |||
| 133 | // Note: this block can't be empty | ||
| 134 | 88 | unsigned char byte = bi->blk->data[offset - 1]; | |
| 135 |
1/2✓ Branch 5 → 6 taken 88 times.
✗ Branch 5 → 7 not taken.
|
88 | if (likely(byte < 0x80)) { |
| 136 | 88 | *up = byte; | |
| 137 | 88 | bi->offset--; | |
| 138 | 88 | return 1; | |
| 139 | } | ||
| 140 | |||
| 141 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
| 142 | ✗ | return offset - bi->offset; | |
| 143 | } | ||
| 144 | |||
| 145 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
| 146 | { | ||
| 147 | 45 | CodePoint u; | |
| 148 | 45 | size_t size = block_iter_next_char(bi, &u); | |
| 149 |
3/4✓ Branch 7 → 8 taken 38 times.
✓ Branch 7 → 10 taken 7 times.
✗ Branch 9 → 4 not taken.
✓ Branch 9 → 10 taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
| 150 | ✗ | size += block_iter_next_char(bi, &u); | |
| 151 | } | ||
| 152 | 45 | return size; | |
| 153 | } | ||
| 154 | |||
| 155 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
| 156 | { | ||
| 157 | 16 | CodePoint u; | |
| 158 | 16 | size_t skip, total = 0; | |
| 159 | 16 | do { | |
| 160 | 16 | skip = block_iter_prev_char(bi, &u); | |
| 161 | 16 | total += skip; | |
| 162 |
3/4✓ Branch 4 → 5 taken 13 times.
✓ Branch 4 → 8 taken 3 times.
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
| 163 | 16 | return total; | |
| 164 | } | ||
| 165 | |||
| 166 | 233 | size_t block_iter_bol(BlockIter *bi) | |
| 167 | { | ||
| 168 | 233 | block_iter_normalize(bi); | |
| 169 | 233 | size_t offset = bi->offset; | |
| 170 |
2/2✓ Branch 3 → 4 taken 98 times.
✓ Branch 3 → 13 taken 135 times.
|
233 | if (block_iter_is_bol(bi)) { |
| 171 | return 0; | ||
| 172 | } | ||
| 173 | |||
| 174 | // These cases are handled by the condition above | ||
| 175 | 98 | const Block *blk = bi->blk; | |
| 176 | 98 | BUG_ON(offset == 0); | |
| 177 | 98 | BUG_ON(offset >= blk->size); | |
| 178 | |||
| 179 |
2/2✓ Branch 8 → 9 taken 30 times.
✓ Branch 8 → 10 taken 68 times.
|
98 | if (blk->nl == 1) { |
| 180 | 30 | bi->offset = 0; // Only 1 line in `blk`; bol is at offset 0 | |
| 181 | 30 | return offset; | |
| 182 | } | ||
| 183 | |||
| 184 | 68 | const char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
| 185 |
2/2✓ Branch 10 → 11 taken 26 times.
✓ Branch 10 → 12 taken 42 times.
|
68 | if (!nl) { |
| 186 | 26 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
| 187 | 26 | return offset; | |
| 188 | } | ||
| 189 | |||
| 190 | 42 | offset = (size_t)(nl - blk->data) + 1; | |
| 191 | 42 | size_t count = bi->offset - offset; | |
| 192 | 42 | bi->offset = offset; | |
| 193 | 42 | return count; | |
| 194 | } | ||
| 195 | |||
| 196 | 28 | size_t block_iter_eol(BlockIter *bi) | |
| 197 | { | ||
| 198 | 28 | block_iter_normalize(bi); | |
| 199 | 28 | const Block *blk = bi->blk; | |
| 200 | 28 | const size_t offset = bi->offset; | |
| 201 | |||
| 202 |
2/2✓ Branch 3 → 4 taken 25 times.
✓ Branch 3 → 8 taken 3 times.
|
28 | if (unlikely(offset == blk->size)) { |
| 203 | // Cursor at end of last block | ||
| 204 | return 0; | ||
| 205 | } | ||
| 206 | |||
| 207 |
2/2✓ Branch 4 → 5 taken 4 times.
✓ Branch 4 → 6 taken 21 times.
|
25 | if (blk->nl == 1) { |
| 208 | 4 | bi->offset = blk->size - 1; | |
| 209 | 4 | return bi->offset - offset; | |
| 210 | } | ||
| 211 | |||
| 212 | 21 | const char *end = block_memchr_eol(blk, offset); | |
| 213 | 21 | bi->offset = (size_t)(end - blk->data); | |
| 214 | 21 | return bi->offset - offset; | |
| 215 | } | ||
| 216 | |||
| 217 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
| 218 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
| 219 | { | ||
| 220 | 30 | block_iter_normalize(bi); | |
| 221 | 30 | const char *data = bi->blk->data; | |
| 222 | 30 | size_t count = 0; | |
| 223 | 30 | size_t i = bi->offset; | |
| 224 | |||
| 225 | // We're only operating on one line and checking for ASCII characters, | ||
| 226 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
| 227 |
1/2✓ Branch 6 → 4 taken 52 times.
✗ Branch 6 → 7 not taken.
|
52 | for (size_t n = bi->blk->size; i < n; count++) { |
| 228 | 52 | unsigned char c = data[i++]; | |
| 229 |
2/2✓ Branch 4 → 5 taken 22 times.
✓ Branch 4 → 7 taken 30 times.
|
52 | if (!ascii_isblank(c)) { |
| 230 | break; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | 30 | bi->offset = i; | |
| 235 | 30 | return count; | |
| 236 | } | ||
| 237 | |||
| 238 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
| 239 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
| 240 | |||
| 241 | { | ||
| 242 | 30 | block_iter_normalize(bi); | |
| 243 | 30 | size_t count = 0; | |
| 244 | 30 | size_t i = bi->offset; | |
| 245 | |||
| 246 |
1/2✓ Branch 6 → 4 taken 61 times.
✗ Branch 6 → 7 not taken.
|
61 | for (const char *data = bi->blk->data; i > 0; count++) { |
| 247 | 61 | unsigned char c = data[--i]; | |
| 248 |
2/2✓ Branch 4 → 5 taken 31 times.
✓ Branch 4 → 7 taken 30 times.
|
61 | if (!ascii_isblank(c)) { |
| 249 | i++; | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | 30 | bi->offset = i; | |
| 255 | 30 | return count; | |
| 256 | } | ||
| 257 | |||
| 258 | // Non-empty line can be used to determine size of indentation for the next line | ||
| 259 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
| 260 | { | ||
| 261 | 14 | block_iter_bol(bi); | |
| 262 | 18 | do { | |
| 263 | 18 | StringView line = block_iter_get_line(bi); | |
| 264 |
2/2✓ Branch 4 → 5 taken 14 times.
✓ Branch 4 → 6 taken 4 times.
|
18 | if (!strview_isblank(line)) { |
| 265 | 14 | return true; | |
| 266 | } | ||
| 267 |
1/2✓ Branch 7 → 3 taken 4 times.
✗ Branch 7 → 8 not taken.
|
4 | } while (block_iter_prev_line(bi)); |
| 268 | return false; | ||
| 269 | } | ||
| 270 | |||
| 271 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
| 272 | { | ||
| 273 |
1/2✗ Branch 4 → 3 not taken.
✓ Branch 4 → 5 taken 1 time.
|
1 | while (count > bi->offset) { |
| 274 | ✗ | count -= bi->offset; | |
| 275 | ✗ | bi->blk = BLOCK(bi->blk->node.prev); | |
| 276 | ✗ | bi->offset = bi->blk->size; | |
| 277 | } | ||
| 278 | 1 | bi->offset -= count; | |
| 279 | 1 | } | |
| 280 | |||
| 281 | 291 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
| 282 | { | ||
| 283 | 291 | size_t avail = bi->blk->size - bi->offset; | |
| 284 |
1/2✗ Branch 4 → 3 not taken.
✓ Branch 4 → 5 taken 291 times.
|
291 | while (count > avail) { |
| 285 | ✗ | count -= avail; | |
| 286 | ✗ | bi->blk = BLOCK(bi->blk->node.next); | |
| 287 | ✗ | bi->offset = 0; | |
| 288 | ✗ | avail = bi->blk->size; | |
| 289 | } | ||
| 290 | 291 | bi->offset += count; | |
| 291 | 291 | } | |
| 292 | |||
| 293 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
| 294 | { | ||
| 295 | 63 | Block *blk; | |
| 296 |
1/2✓ Branch 6 → 3 taken 63 times.
✗ Branch 6 → 7 not taken.
|
63 | block_for_each(blk, bi->head) { |
| 297 |
1/2✓ Branch 3 → 4 taken 63 times.
✗ Branch 3 → 5 not taken.
|
63 | if (offset <= blk->size) { |
| 298 | 63 | bi->blk = blk; | |
| 299 | 63 | bi->offset = offset; | |
| 300 | 63 | return; | |
| 301 | } | ||
| 302 | ✗ | offset -= blk->size; | |
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
| 307 | { | ||
| 308 | 4 | Block *blk = BLOCK(bi->head->next); | |
| 309 | 4 | size_t nl = 0; | |
| 310 |
1/4✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 4 times.
✗ Branch 5 → 3 not taken.
✗ Branch 5 → 6 not taken.
|
4 | while (blk->node.next != bi->head && nl + blk->nl < line) { |
| 311 | ✗ | nl += blk->nl; | |
| 312 | ✗ | blk = BLOCK(blk->node.next); | |
| 313 | } | ||
| 314 | |||
| 315 | 4 | bi->blk = blk; | |
| 316 | 4 | bi->offset = 0; | |
| 317 |
2/2✓ Branch 10 → 7 taken 8 times.
✓ Branch 10 → 11 taken 3 times.
|
11 | while (nl < line) { |
| 318 |
2/2✓ Branch 8 → 9 taken 7 times.
✓ Branch 8 → 11 taken 1 time.
|
8 | if (!block_iter_eat_line(bi)) { |
| 319 | break; | ||
| 320 | } | ||
| 321 | 7 | nl++; | |
| 322 | } | ||
| 323 | 4 | } | |
| 324 | |||
| 325 | 328 | size_t block_iter_get_offset(const BlockIter *bi) | |
| 326 | { | ||
| 327 | 328 | const Block *blk; | |
| 328 | 328 | size_t offset = 0; | |
| 329 |
1/2✓ Branch 5 → 3 taken 328 times.
✗ Branch 5 → 6 not taken.
|
328 | block_for_each(blk, bi->head) { |
| 330 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 328 times.
|
328 | if (blk == bi->blk) { |
| 331 | break; | ||
| 332 | } | ||
| 333 | ✗ | offset += blk->size; | |
| 334 | } | ||
| 335 | 328 | return offset + bi->offset; | |
| 336 | } | ||
| 337 | |||
| 338 | 13 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | |
| 339 | { | ||
| 340 |
1/2✓ Branch 2 → 3 taken 13 times.
✗ Branch 2 → 10 not taken.
|
13 | if (len == 0) { |
| 341 | return NULL; | ||
| 342 | } | ||
| 343 | |||
| 344 | 13 | const Block *blk = bi->blk; | |
| 345 | 13 | size_t offset = bi->offset; | |
| 346 | 13 | size_t pos = 0; | |
| 347 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
| 348 | |||
| 349 |
2/2✓ Branch 9 → 5 taken 13 times.
✓ Branch 9 → 10 taken 13 times.
|
26 | while (pos < len) { |
| 350 | 13 | const size_t avail = blk->size - offset; | |
| 351 | 13 | size_t count = MIN(len - pos, avail); | |
| 352 | 13 | memcpy(buf + pos, blk->data + offset, count); | |
| 353 | 13 | pos += count; | |
| 354 | 13 | BUG_ON(pos < len && blk->node.next == bi->head); | |
| 355 | 13 | blk = BLOCK(blk->node.next); | |
| 356 | 13 | offset = 0; | |
| 357 | } | ||
| 358 | |||
| 359 | return buf; | ||
| 360 | } | ||
| 361 | |||
| 362 | // Return the contents of the line that extends from `bi`. Callers | ||
| 363 | // should ensure `bi` is already at BOL, if whole lines are needed. | ||
| 364 | 381 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
| 365 | { | ||
| 366 | 381 | block_iter_normalize(bi); | |
| 367 | 381 | const char *start = bi->blk->data + bi->offset; | |
| 368 | 381 | StringView line = {.data = start, .length = 0}; | |
| 369 | 381 | size_t max = bi->blk->size - bi->offset; | |
| 370 | |||
| 371 |
2/2✓ Branch 3 → 4 taken 10 times.
✓ Branch 3 → 5 taken 371 times.
|
381 | if (unlikely(max == 0)) { |
| 372 | // Cursor at end of last block | ||
| 373 | 10 | return line; | |
| 374 | } | ||
| 375 | |||
| 376 |
2/2✓ Branch 5 → 6 taken 48 times.
✓ Branch 5 → 9 taken 323 times.
|
371 | if (bi->blk->nl == 1) { |
| 377 | // Block contains only 1 line; end-of-line is end-of-block | ||
| 378 | 48 | BUG_ON(line.data[max - 1] != '\n'); | |
| 379 | 48 | line.length = max; | |
| 380 | 48 | return line; | |
| 381 | } | ||
| 382 | |||
| 383 | 323 | const char *nl = block_memchr_eol(bi->blk, bi->offset); | |
| 384 | 323 | line.length = (size_t)(nl - start + 1); | |
| 385 | 323 | BUG_ON(line.length == 0); | |
| 386 | 323 | return line; | |
| 387 | } | ||
| 388 |