src/block-iter.c
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <string.h> | ||
| 2 | #include "block-iter.h" | ||
| 3 | #include "util/ascii.h" | ||
| 4 | #include "util/debug.h" | ||
| 5 | #include "util/str-util.h" | ||
| 6 | #include "util/utf8.h" | ||
| 7 | #include "util/xmalloc.h" | ||
| 8 | #include "util/xmemrchr.h" | ||
| 9 | |||
| 10 | // Move to end of previous line (if any) and return number of bytes moved | ||
| 11 | 101 | static size_t block_iter_prev_line_eol(BlockIter *bi) | |
| 12 | { | ||
| 13 | 101 | BlockIter tmp = *bi; | |
| 14 | 101 | size_t n = block_iter_bol(&tmp); | |
| 15 |
2/2✓ Branch 3 → 4 taken 91 times.
✓ Branch 3 → 6 taken 10 times.
|
101 | if (block_iter_is_bof(&tmp)) { |
| 16 | return 0; // Already on first line; leave `bi` unchanged | ||
| 17 | } | ||
| 18 | |||
| 19 | 91 | CodePoint u; | |
| 20 | 91 | *bi = tmp; | |
| 21 | 91 | return n + block_iter_prev_char(bi, &u); | |
| 22 | } | ||
| 23 | |||
| 24 | // Move to beginning of previous line (if any) and return number of bytes moved | ||
| 25 | 101 | size_t block_iter_prev_line(BlockIter *bi) | |
| 26 | { | ||
| 27 | 101 | size_t n = block_iter_prev_line_eol(bi); | |
| 28 |
2/2✓ Branch 3 → 4 taken 91 times.
✓ Branch 3 → 6 taken 10 times.
|
101 | return n ? n + block_iter_bol(bi) : 0; |
| 29 | } | ||
| 30 | |||
| 31 | 126 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | |
| 32 | { | ||
| 33 | 126 | BlockIter tmp = *bi; | |
| 34 | 126 | return block_iter_next_char(&tmp, up); | |
| 35 | } | ||
| 36 | |||
| 37 | 522 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | |
| 38 | { | ||
| 39 |
3/4✓ Branch 2 → 3 taken 20 times.
✓ Branch 2 → 5 taken 502 times.
✗ Branch 4 → 5 not taken.
✓ Branch 4 → 11 taken 20 times.
|
522 | if (unlikely(bi->offset == bi->blk->size && !block_iter_next_block(bi))) { |
| 40 | return 0; // Already at EOF | ||
| 41 | } | ||
| 42 | |||
| 43 | 502 | BUG_ON(bi->blk->size == 0); // This block can't be empty | |
| 44 | |||
| 45 | 502 | unsigned char byte = bi->blk->data[bi->offset]; | |
| 46 |
2/2✓ Branch 7 → 8 taken 500 times.
✓ Branch 7 → 9 taken 2 times.
|
502 | if (likely(byte < 0x80)) { |
| 47 | 500 | *up = byte; | |
| 48 | 500 | bi->offset++; | |
| 49 | 500 | return 1; | |
| 50 | } | ||
| 51 | |||
| 52 | 2 | size_t prev_offset = bi->offset; | |
| 53 | 2 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | |
| 54 | 2 | return bi->offset - prev_offset; | |
| 55 | } | ||
| 56 | |||
| 57 | 185 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | |
| 58 | { | ||
| 59 |
3/4✓ Branch 2 → 3 taken 6 times.
✓ Branch 2 → 5 taken 179 times.
✗ Branch 4 → 5 not taken.
✓ Branch 4 → 13 taken 6 times.
|
185 | if (unlikely(bi->offset == 0 && !block_iter_end_of_prev_block(bi))) { |
| 60 | return 0; // Already at BOF | ||
| 61 | } | ||
| 62 | |||
| 63 | 179 | BUG_ON(bi->blk->size == 0); // This block can't be empty | |
| 64 | 179 | BUG_ON(bi->offset == 0); | |
| 65 | |||
| 66 | 179 | unsigned char byte = bi->blk->data[bi->offset - 1]; | |
| 67 |
1/2✓ Branch 9 → 10 taken 179 times.
✗ Branch 9 → 11 not taken.
|
179 | if (likely(byte < 0x80)) { |
| 68 | 179 | *up = byte; | |
| 69 | 179 | bi->offset--; | |
| 70 | 179 | return 1; | |
| 71 | } | ||
| 72 | |||
| 73 | ✗ | size_t prev_offset = bi->offset; | |
| 74 | ✗ | *up = u_prev_char(bi->blk->data, &bi->offset); | |
| 75 | ✗ | return prev_offset - bi->offset; | |
| 76 | } | ||
| 77 | |||
| 78 | 45 | size_t block_iter_next_column(BlockIter *bi) | |
| 79 | { | ||
| 80 | 45 | CodePoint u; | |
| 81 | 45 | size_t size = block_iter_next_char(bi, &u); | |
| 82 |
3/4✓ Branch 7 → 8 taken 38 times.
✓ Branch 7 → 10 taken 7 times.
✗ Branch 9 → 4 not taken.
✓ Branch 9 → 10 taken 38 times.
|
45 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { |
| 83 | ✗ | size += block_iter_next_char(bi, &u); | |
| 84 | } | ||
| 85 | 45 | return size; | |
| 86 | } | ||
| 87 | |||
| 88 | 16 | size_t block_iter_prev_column(BlockIter *bi) | |
| 89 | { | ||
| 90 | 16 | CodePoint u; | |
| 91 | 16 | size_t skip, total = 0; | |
| 92 | 16 | do { | |
| 93 | 16 | skip = block_iter_prev_char(bi, &u); | |
| 94 | 16 | total += skip; | |
| 95 |
3/4✓ Branch 4 → 5 taken 13 times.
✓ Branch 4 → 8 taken 3 times.
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 13 times.
|
16 | } while (skip && u_is_zero_width(u)); |
| 96 | 16 | return total; | |
| 97 | } | ||
| 98 | |||
| 99 | 437 | size_t block_iter_bol(BlockIter *bi) | |
| 100 | { | ||
| 101 | 437 | block_iter_normalize(bi); | |
| 102 |
2/2✓ Branch 3 → 4 taken 206 times.
✓ Branch 3 → 13 taken 231 times.
|
437 | if (block_iter_is_bol(bi)) { |
| 103 | return 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | // These cases are handled by the condition above | ||
| 107 | 206 | const Block *blk = bi->blk; | |
| 108 | 206 | size_t offset = bi->offset; | |
| 109 | 206 | BUG_ON(offset == 0); | |
| 110 | 206 | BUG_ON(offset >= blk->size); | |
| 111 | |||
| 112 |
2/2✓ Branch 8 → 9 taken 31 times.
✓ Branch 8 → 10 taken 175 times.
|
206 | if (blk->nl == 1) { |
| 113 | 31 | bi->offset = 0; // Only 1 line in Block; bol is at offset 0 | |
| 114 | 31 | return offset; | |
| 115 | } | ||
| 116 | |||
| 117 | 175 | const char *nl = xmemrchr(blk->data, '\n', offset - 1); | |
| 118 |
2/2✓ Branch 10 → 11 taken 45 times.
✓ Branch 10 → 12 taken 130 times.
|
175 | if (!nl) { |
| 119 | 45 | bi->offset = 0; // No newline before offset; bol is at offset 0 | |
| 120 | 45 | return offset; | |
| 121 | } | ||
| 122 | |||
| 123 | 130 | offset = (size_t)(nl - blk->data) + 1; | |
| 124 | 130 | size_t count = bi->offset - offset; | |
| 125 | 130 | bi->offset = offset; | |
| 126 | 130 | return count; | |
| 127 | } | ||
| 128 | |||
| 129 | 247 | size_t block_iter_eol(BlockIter *bi) | |
| 130 | { | ||
| 131 | 247 | block_iter_normalize(bi); | |
| 132 | 247 | const Block *blk = bi->blk; | |
| 133 | 247 | size_t offset = bi->offset; | |
| 134 |
2/2✓ Branch 3 → 4 taken 241 times.
✓ Branch 3 → 12 taken 6 times.
|
247 | if (unlikely(offset == blk->size)) { |
| 135 | return 0; // Already at EOF | ||
| 136 | } | ||
| 137 | |||
| 138 | 241 | BUG_ON(blk->size == 0); // This block can't be empty | |
| 139 | 241 | BUG_ON(blk->nl == 0); | |
| 140 | |||
| 141 |
2/2✓ Branch 8 → 9 taken 11 times.
✓ Branch 8 → 10 taken 230 times.
|
241 | if (blk->nl == 1) { |
| 142 | 11 | bi->offset = blk->size - 1; | |
| 143 | 11 | return bi->offset - offset; | |
| 144 | } | ||
| 145 | |||
| 146 | 230 | StringView line = buf_slice_next_line(blk->data, &offset, blk->size); | |
| 147 | 230 | bi->offset += line.length; | |
| 148 | 230 | return line.length; | |
| 149 | } | ||
| 150 | |||
| 151 | // Move after next newline (beginning of next line or end of file) and | ||
| 152 | // return number of bytes moved | ||
| 153 | 228 | size_t block_iter_eat_line(BlockIter *bi) | |
| 154 | { | ||
| 155 | 228 | CodePoint u; | |
| 156 | 228 | size_t n = block_iter_eol(bi); | |
| 157 | 228 | size_t m = block_iter_next_char(bi, &u); | |
| 158 | 228 | BUG_ON(m && (m != 1 || u != '\n')); | |
| 159 | 228 | return n + m; | |
| 160 | } | ||
| 161 | |||
| 162 | // Move to beginning of next line (if any) and return number of bytes moved | ||
| 163 | 79 | size_t block_iter_next_line(BlockIter *bi) | |
| 164 | { | ||
| 165 | 79 | BlockIter tmp = *bi; | |
| 166 | 79 | size_t move = block_iter_eat_line(&tmp); | |
| 167 |
2/2✓ Branch 3 → 4 taken 64 times.
✓ Branch 3 → 5 taken 15 times.
|
79 | if (unlikely(block_iter_is_eof(&tmp))) { |
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | 64 | *bi = tmp; | |
| 172 | 64 | return move; | |
| 173 | } | ||
| 174 | |||
| 175 | // Count spaces and tabs at or after iterator (and move beyond them) | ||
| 176 | 30 | size_t block_iter_skip_blanks_fwd(BlockIter *bi) | |
| 177 | { | ||
| 178 | 30 | block_iter_normalize(bi); | |
| 179 | 30 | StringView sv = strview_from_slice(bi->blk->data, bi->offset, bi->blk->size); | |
| 180 | |||
| 181 | // We're only operating on one line and checking for ASCII characters, | ||
| 182 | // so Block traversal and Unicode-aware decoding are both unnecessary | ||
| 183 | 30 | size_t count = strview_blank_prefix_length(sv); | |
| 184 | |||
| 185 | 30 | bi->offset += count + 1; | |
| 186 | 30 | return count; | |
| 187 | } | ||
| 188 | |||
| 189 | // Count spaces and tabs before iterator (and move to beginning of them) | ||
| 190 | 30 | size_t block_iter_skip_blanks_bwd(BlockIter *bi) | |
| 191 | { | ||
| 192 | 30 | block_iter_normalize(bi); | |
| 193 | 30 | StringView sv = string_view(bi->blk->data, bi->offset); | |
| 194 | 30 | size_t count = strview_blank_suffix_length(sv); | |
| 195 | 30 | bi->offset -= count; | |
| 196 | 30 | return count; | |
| 197 | } | ||
| 198 | |||
| 199 | // Non-empty line can be used to determine size of indentation for the next line | ||
| 200 | 14 | bool block_iter_find_non_empty_line_bwd(BlockIter *bi) | |
| 201 | { | ||
| 202 | 14 | block_iter_bol(bi); | |
| 203 | 18 | do { | |
| 204 | 18 | StringView line = block_iter_get_line(bi); | |
| 205 |
2/2✓ Branch 5 → 6 taken 14 times.
✓ Branch 5 → 7 taken 4 times.
|
18 | if (!strview_isblank(line)) { |
| 206 | 14 | return true; | |
| 207 | } | ||
| 208 |
1/2✓ Branch 8 → 3 taken 4 times.
✗ Branch 8 → 9 not taken.
|
4 | } while (block_iter_prev_line(bi)); |
| 209 | return false; | ||
| 210 | } | ||
| 211 | |||
| 212 | 1 | void block_iter_back_bytes(BlockIter *bi, size_t count) | |
| 213 | { | ||
| 214 | 1 | while (count > bi->offset) { | |
| 215 | ✗ | count -= bi->offset; | |
| 216 | ✗ | bool have_prev_block = block_iter_end_of_prev_block(bi); | |
| 217 | 1 | BUG_ON(!have_prev_block); | |
| 218 | } | ||
| 219 | 1 | bi->offset -= count; | |
| 220 | 1 | } | |
| 221 | |||
| 222 | 334 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | |
| 223 | { | ||
| 224 | 334 | size_t avail = bi->blk->size - bi->offset; | |
| 225 |
1/2✗ Branch 7 → 3 not taken.
✓ Branch 7 → 8 taken 334 times.
|
334 | while (count > avail) { |
| 226 | ✗ | count -= avail; | |
| 227 | ✗ | bool have_next_block = block_iter_next_block(bi); | |
| 228 | ✗ | BUG_ON(!have_next_block); | |
| 229 | ✗ | avail = bi->blk->size; | |
| 230 | } | ||
| 231 | 334 | bi->offset += count; | |
| 232 | 334 | } | |
| 233 | |||
| 234 | 63 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | |
| 235 | { | ||
| 236 | 63 | Block *blk; | |
| 237 |
1/2✓ Branch 6 → 3 taken 63 times.
✗ Branch 6 → 7 not taken.
|
63 | block_for_each(blk, bi->head) { |
| 238 |
1/2✓ Branch 3 → 4 taken 63 times.
✗ Branch 3 → 5 not taken.
|
63 | if (offset <= blk->size) { |
| 239 | 63 | bi->blk = blk; | |
| 240 | 63 | bi->offset = offset; | |
| 241 | 63 | return; | |
| 242 | } | ||
| 243 | ✗ | offset -= blk->size; | |
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | 4 | void block_iter_goto_line(BlockIter *bi, size_t line) | |
| 248 | { | ||
| 249 | 4 | Block *blk = block_iter_get_first_block(bi); | |
| 250 | 4 | size_t nl = 0; | |
| 251 |
1/4✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 4 times.
✗ Branch 5 → 3 not taken.
✗ Branch 5 → 6 not taken.
|
4 | while (block_has_next(blk, bi->head) && nl + blk->nl < line) { |
| 252 | ✗ | nl += blk->nl; | |
| 253 | ✗ | blk = block_next(blk); | |
| 254 | } | ||
| 255 | |||
| 256 | 4 | bi->blk = blk; | |
| 257 | 4 | bi->offset = 0; | |
| 258 |
2/2✓ Branch 10 → 7 taken 8 times.
✓ Branch 10 → 11 taken 3 times.
|
11 | while (nl < line) { |
| 259 |
2/2✓ Branch 8 → 9 taken 7 times.
✓ Branch 8 → 11 taken 1 time.
|
8 | if (!block_iter_eat_line(bi)) { |
| 260 | break; | ||
| 261 | } | ||
| 262 | 7 | nl++; | |
| 263 | } | ||
| 264 | 4 | } | |
| 265 | |||
| 266 | 338 | size_t block_iter_get_offset(const BlockIter *bi) | |
| 267 | { | ||
| 268 | 338 | const Block *blk; | |
| 269 | 338 | size_t offset = 0; | |
| 270 |
1/2✓ Branch 5 → 3 taken 338 times.
✗ Branch 5 → 6 not taken.
|
338 | block_for_each(blk, bi->head) { |
| 271 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 338 times.
|
338 | if (blk == bi->blk) { |
| 272 | break; | ||
| 273 | } | ||
| 274 | ✗ | offset += blk->size; | |
| 275 | } | ||
| 276 | 338 | return offset + bi->offset; | |
| 277 | } | ||
| 278 | |||
| 279 | 13 | char *block_iter_get_bytes(BlockIter bi, size_t len) | |
| 280 | { | ||
| 281 |
1/2✓ Branch 2 → 3 taken 13 times.
✗ Branch 2 → 9 not taken.
|
13 | if (len == 0) { |
| 282 | return NULL; | ||
| 283 | } | ||
| 284 | |||
| 285 | 13 | size_t pos = 0; | |
| 286 | 13 | char *buf = xmalloc(len + 1); // +1 byte; so expand_word() can append '\0' | |
| 287 | |||
| 288 | 13 | while (pos < len) { | |
| 289 | 13 | const size_t avail = bi.blk->size - bi.offset; | |
| 290 | 13 | size_t count = MIN(len - pos, avail); | |
| 291 | 13 | memcpy(buf + pos, bi.blk->data + bi.offset, count); | |
| 292 | 13 | pos += count; | |
| 293 | 13 | bool have_next_block = block_iter_next_block(&bi); | |
| 294 | 26 | BUG_ON(pos < len && !have_next_block); | |
| 295 | } | ||
| 296 | |||
| 297 | return buf; | ||
| 298 | } | ||
| 299 | |||
| 300 | // Return the contents of the line that extends from `bi`. Callers | ||
| 301 | // should ensure `bi` is already at BOL, if whole lines are needed. | ||
| 302 | 393 | StringView block_iter_get_line_with_nl(BlockIter *bi) | |
| 303 | { | ||
| 304 | 393 | block_iter_normalize(bi); | |
| 305 |
2/2✓ Branch 3 → 4 taken 10 times.
✓ Branch 3 → 5 taken 383 times.
|
393 | if (unlikely(bi->offset == bi->blk->size)) { |
| 306 | // Cursor at end of last block | ||
| 307 | 10 | return strview(""); | |
| 308 | } | ||
| 309 | |||
| 310 | 383 | StringView line; | |
| 311 |
2/2✓ Branch 5 → 6 taken 48 times.
✓ Branch 5 → 7 taken 335 times.
|
383 | if (bi->blk->nl == 1) { |
| 312 | // Block contains only 1 line; end-of-line is end-of-block | ||
| 313 | 48 | line = strview_from_slice(bi->blk->data, bi->offset, bi->blk->size); | |
| 314 | } else { | ||
| 315 | 335 | size_t pos = bi->offset; | |
| 316 | 335 | line = buf_slice_next_line(bi->blk->data, &pos, bi->blk->size); | |
| 317 | 335 | line.length += 1; // Include the newline | |
| 318 | } | ||
| 319 | |||
| 320 | 383 | BUG_ON(!strview_has_suffix(line, "\n")); | |
| 321 | 383 | return line; | |
| 322 | } | ||
| 323 |