| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <errno.h> | ||
| 2 | #include <inttypes.h> | ||
| 3 | #include <stdlib.h> | ||
| 4 | #include <string.h> | ||
| 5 | #include "convert.h" | ||
| 6 | #include "block.h" | ||
| 7 | #include "buildvar-iconv.h" | ||
| 8 | #include "encoding.h" | ||
| 9 | #include "util/arith.h" | ||
| 10 | #include "util/debug.h" | ||
| 11 | #include "util/list.h" | ||
| 12 | #include "util/log.h" | ||
| 13 | #include "util/str-util.h" | ||
| 14 | #include "util/utf8.h" | ||
| 15 | #include "util/xmalloc.h" | ||
| 16 | #include "util/xreadwrite.h" | ||
| 17 | |||
| 18 | enum { | ||
| 19 | // If any line exceeds this length when reading a file, syntax | ||
| 20 | // highlighting will be automatically disabled | ||
| 21 | SYN_HIGHLIGHT_MAX_LINE_LEN = 512u << 10, // 512KiB | ||
| 22 | }; | ||
| 23 | |||
| 24 | typedef struct { | ||
| 25 | const char *ibuf; | ||
| 26 | ssize_t ipos; | ||
| 27 | ssize_t isize; | ||
| 28 | struct cconv *cconv; | ||
| 29 | } FileDecoder; | ||
| 30 | |||
| 31 | 56 | static void add_block(Buffer *buffer, Block *blk) | |
| 32 | { | ||
| 33 | 56 | buffer->nl += blk->nl; | |
| 34 | 56 | list_insert_before(&blk->node, &buffer->blocks); | |
| 35 | 56 | } | |
| 36 | |||
| 37 | 7636 | static Block *add_utf8_line ( | |
| 38 | Buffer *buffer, | ||
| 39 | Block *blk, | ||
| 40 | const char *line, | ||
| 41 | size_t len | ||
| 42 | ) { | ||
| 43 | 7636 | size_t size = len + 1; | |
| 44 |
2/2✓ Branch 0 (2→3) taken 7608 times.
✓ Branch 1 (2→6) taken 28 times.
|
7636 | if (blk) { |
| 45 | 7608 | size_t avail = blk->alloc - blk->size; | |
| 46 |
2/2✓ Branch 0 (3→4) taken 7580 times.
✓ Branch 1 (3→5) taken 28 times.
|
7608 | if (size <= avail) { |
| 47 | 7580 | goto copy; | |
| 48 | } | ||
| 49 | 28 | add_block(buffer, blk); | |
| 50 | } | ||
| 51 | 56 | size = MAX(size, 8192); | |
| 52 | 56 | blk = block_new(size); | |
| 53 | |||
| 54 | 7636 | copy: | |
| 55 |
1/4✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→11) taken 7636 times.
✗ Branch 2 (8→9) not taken.
✗ Branch 3 (8→11) not taken.
|
7636 | if (unlikely(len > SYN_HIGHLIGHT_MAX_LINE_LEN && buffer->options.syntax)) { |
| 56 | // TODO: Make the limit configurable and add documentation | ||
| 57 | // TODO: Pass in an ErrorBuffer* and use error_msg() instead of LOG_NOTICE() | ||
| 58 | ✗ | LOG_NOTICE ( | |
| 59 | "line length (%zu) exceeded limit (%ju); disabling syntax highlighting", | ||
| 60 | len, (uintmax_t)SYN_HIGHLIGHT_MAX_LINE_LEN | ||
| 61 | ); | ||
| 62 | ✗ | buffer->options.syntax = false; | |
| 63 | } | ||
| 64 | |||
| 65 | 7636 | memcpy(blk->data + blk->size, line, len); | |
| 66 | 7636 | blk->size += len; | |
| 67 | 7636 | blk->data[blk->size++] = '\n'; | |
| 68 | 7636 | blk->nl++; | |
| 69 | 7636 | return blk; | |
| 70 | } | ||
| 71 | |||
| 72 | 7670 | static bool read_utf8_line(FileDecoder *dec, const char **linep, size_t *lenp) | |
| 73 | { | ||
| 74 | 7670 | const char *line = dec->ibuf + dec->ipos; | |
| 75 | 7670 | const char *nl = memchr(line, '\n', dec->isize - dec->ipos); | |
| 76 | 7670 | size_t len; | |
| 77 | |||
| 78 |
2/2✓ Branch 0 (2→3) taken 7634 times.
✓ Branch 1 (2→4) taken 36 times.
|
7670 | if (nl) { |
| 79 | 7634 | len = nl - line; | |
| 80 | 7634 | dec->ipos += len + 1; | |
| 81 | } else { | ||
| 82 | 36 | len = dec->isize - dec->ipos; | |
| 83 |
2/2✓ Branch 0 (4→5) taken 2 times.
✓ Branch 1 (4→7) taken 34 times.
|
36 | if (len == 0) { |
| 84 | return false; | ||
| 85 | } | ||
| 86 | 2 | dec->ipos += len; | |
| 87 | } | ||
| 88 | |||
| 89 | 7636 | *linep = line; | |
| 90 | 7636 | *lenp = len; | |
| 91 | 7636 | return true; | |
| 92 | } | ||
| 93 | |||
| 94 | 34 | static bool file_decoder_read_utf8(Buffer *buffer, const char *buf, size_t size) | |
| 95 | { | ||
| 96 |
1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→5) taken 34 times.
|
34 | if (unlikely(!encoding_is_utf8(buffer->encoding))) { |
| 97 | ✗ | errno = EINVAL; | |
| 98 | ✗ | return false; | |
| 99 | } | ||
| 100 | |||
| 101 | 34 | FileDecoder dec = { | |
| 102 | .ibuf = buf, | ||
| 103 | .isize = size, | ||
| 104 | }; | ||
| 105 | |||
| 106 | 34 | const char *line; | |
| 107 | 34 | size_t len; | |
| 108 | |||
| 109 |
2/2✓ Branch 0 (6→7) taken 28 times.
✓ Branch 1 (6→23) taken 6 times.
|
34 | if (!read_utf8_line(&dec, &line, &len)) { |
| 110 | return true; | ||
| 111 | } | ||
| 112 | |||
| 113 |
3/4✓ Branch 0 (7→8) taken 28 times.
✗ Branch 1 (7→10) not taken.
✓ Branch 2 (8→9) taken 1 times.
✓ Branch 3 (8→10) taken 27 times.
|
28 | if (len && line[len - 1] == '\r') { |
| 114 | 1 | buffer->crlf_newlines = true; | |
| 115 | 1 | len--; | |
| 116 | } | ||
| 117 | |||
| 118 | 28 | Block *blk = add_utf8_line(buffer, NULL, line, len); | |
| 119 | |||
| 120 |
2/2✓ Branch 0 (11→16) taken 1 times.
✓ Branch 1 (11→19) taken 27 times.
|
28 | if (unlikely(buffer->crlf_newlines)) { |
| 121 |
2/2✓ Branch 0 (17→12) taken 270 times.
✓ Branch 1 (17→21) taken 1 times.
|
271 | while (read_utf8_line(&dec, &line, &len)) { |
| 122 |
4/4✓ Branch 0 (12→13) taken 268 times.
✓ Branch 1 (12→15) taken 2 times.
✓ Branch 2 (13→14) taken 1 times.
✓ Branch 3 (13→15) taken 267 times.
|
270 | if (len && line[len - 1] == '\r') { |
| 123 | 1 | len--; | |
| 124 | } | ||
| 125 | 270 | blk = add_utf8_line(buffer, blk, line, len); | |
| 126 | } | ||
| 127 | } else { | ||
| 128 |
2/2✓ Branch 0 (20→18) taken 7338 times.
✓ Branch 1 (20→21) taken 27 times.
|
7365 | while (read_utf8_line(&dec, &line, &len)) { |
| 129 | 7338 | blk = add_utf8_line(buffer, blk, line, len); | |
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 |
1/2✓ Branch 0 (21→22) taken 28 times.
✗ Branch 1 (21→23) not taken.
|
28 | if (blk) { |
| 134 | 28 | add_block(buffer, blk); | |
| 135 | } | ||
| 136 | |||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | 1 | static size_t unix_to_dos ( | |
| 141 | FileEncoder *enc, | ||
| 142 | const char *buf, | ||
| 143 | size_t size | ||
| 144 | ) { | ||
| 145 | // TODO: Pass in Buffer::nl and make this size adjustment more conservative | ||
| 146 | // (it's resized to handle the worst possible case, despite the fact that we | ||
| 147 | // already have the number of newlines pre-computed) | ||
| 148 |
1/2✓ Branch 0 (2→3) taken 1 times.
✗ Branch 1 (2→8) not taken.
|
1 | if (enc->nsize < size * 2) { |
| 149 | 1 | enc->nsize = size * 2; | |
| 150 | 1 | enc->nbuf = xrealloc(enc->nbuf, enc->nsize); | |
| 151 | } | ||
| 152 | |||
| 153 | // TODO: Optimize this loop, by making use of memccpy(3) | ||
| 154 | size_t d = 0; | ||
| 155 |
2/2✓ Branch 0 (9→5) taken 21 times.
✓ Branch 1 (9→10) taken 1 times.
|
22 | for (size_t s = 0; s < size; s++) { |
| 156 | 21 | unsigned char ch = buf[s]; | |
| 157 |
2/2✓ Branch 0 (5→6) taken 3 times.
✓ Branch 1 (5→7) taken 18 times.
|
21 | if (ch == '\n') { |
| 158 | 3 | enc->nbuf[d++] = '\r'; | |
| 159 | } | ||
| 160 | 21 | enc->nbuf[d++] = ch; | |
| 161 | } | ||
| 162 | |||
| 163 | 1 | return d; | |
| 164 | } | ||
| 165 | |||
| 166 | #if ICONV_DISABLE == 1 // iconv not available; use basic, UTF-8 implementation: | ||
| 167 | |||
| 168 | bool conversion_supported_by_iconv ( | ||
| 169 | const char* UNUSED_ARG(from), | ||
| 170 | const char* UNUSED_ARG(to) | ||
| 171 | ) { | ||
| 172 | errno = EINVAL; | ||
| 173 | return false; | ||
| 174 | } | ||
| 175 | |||
| 176 | FileEncoder file_encoder(const char *encoding, bool crlf, int fd) | ||
| 177 | { | ||
| 178 | if (unlikely(!encoding_is_utf8(encoding))) { | ||
| 179 | BUG("unsupported conversion; should have been handled earlier"); | ||
| 180 | } | ||
| 181 | |||
| 182 | return (FileEncoder) { | ||
| 183 | .crlf = crlf, | ||
| 184 | .fd = fd, | ||
| 185 | }; | ||
| 186 | } | ||
| 187 | |||
| 188 | void file_encoder_free(FileEncoder *enc) | ||
| 189 | { | ||
| 190 | free(enc->nbuf); | ||
| 191 | } | ||
| 192 | |||
| 193 | ssize_t file_encoder_write(FileEncoder *enc, const char *buf, size_t n) | ||
| 194 | { | ||
| 195 | if (unlikely(enc->crlf)) { | ||
| 196 | n = unix_to_dos(enc, buf, n); | ||
| 197 | buf = enc->nbuf; | ||
| 198 | } | ||
| 199 | return xwrite_all(enc->fd, buf, n); | ||
| 200 | } | ||
| 201 | |||
| 202 | size_t file_encoder_get_nr_errors(const FileEncoder* UNUSED_ARG(enc)) | ||
| 203 | { | ||
| 204 | return 0; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool file_decoder_read(Buffer *buffer, const char *buf, size_t size) | ||
| 208 | { | ||
| 209 | return file_decoder_read_utf8(buffer, buf, size); | ||
| 210 | } | ||
| 211 | |||
| 212 | #else // ICONV_DISABLE != 1; use full iconv implementation: | ||
| 213 | |||
| 214 | #include <iconv.h> | ||
| 215 | |||
| 216 | // UTF-8 encoding of U+00BF (inverted question mark; "¿") | ||
| 217 | #define REPLACEMENT "\xc2\xbf" | ||
| 218 | |||
| 219 | struct cconv { | ||
| 220 | iconv_t cd; | ||
| 221 | char *obuf; | ||
| 222 | size_t osize; | ||
| 223 | size_t opos; | ||
| 224 | size_t consumed; | ||
| 225 | size_t errors; | ||
| 226 | |||
| 227 | // Temporary input buffer | ||
| 228 | char tbuf[16]; | ||
| 229 | size_t tcount; | ||
| 230 | |||
| 231 | // REPLACEMENT character, in target encoding | ||
| 232 | char rbuf[4]; | ||
| 233 | size_t rcount; | ||
| 234 | |||
| 235 | // Input character size in bytes, or zero for UTF-8 | ||
| 236 | size_t char_size; | ||
| 237 | }; | ||
| 238 | |||
| 239 | 1 | static struct cconv *create(iconv_t cd) | |
| 240 | { | ||
| 241 | 1 | struct cconv *c = xcalloc1(sizeof(*c)); | |
| 242 | 1 | c->cd = cd; | |
| 243 | 1 | c->osize = 8192; | |
| 244 | 1 | c->obuf = xmalloc(c->osize); | |
| 245 | 1 | return c; | |
| 246 | } | ||
| 247 | |||
| 248 | 2 | static size_t iconv_wrapper ( | |
| 249 | iconv_t cd, | ||
| 250 | const char **restrict inbuf, | ||
| 251 | size_t *restrict inbytesleft, | ||
| 252 | char **restrict outbuf, | ||
| 253 | size_t *restrict outbytesleft | ||
| 254 | ) { | ||
| 255 | // POSIX defines the second parameter of iconv(3) as "char **restrict" | ||
| 256 | // but NetBSD declares it as "const char **restrict" | ||
| 257 | #ifdef __NetBSD__ | ||
| 258 | const char **restrict in = inbuf; | ||
| 259 | #else | ||
| 260 | 2 | char **restrict in = (char **restrict)inbuf; | |
| 261 | #endif | ||
| 262 | |||
| 263 | 2 | return iconv(cd, in, inbytesleft, outbuf, outbytesleft); | |
| 264 | } | ||
| 265 | |||
| 266 | ✗ | static void resize_obuf(struct cconv *c) | |
| 267 | { | ||
| 268 | ✗ | c->osize = xmul(2, c->osize); | |
| 269 | ✗ | c->obuf = xrealloc(c->obuf, c->osize); | |
| 270 | ✗ | } | |
| 271 | |||
| 272 | ✗ | static void add_replacement(struct cconv *c) | |
| 273 | { | ||
| 274 | ✗ | if (c->osize - c->opos < 4) { | |
| 275 | ✗ | resize_obuf(c); | |
| 276 | } | ||
| 277 | |||
| 278 | ✗ | memcpy(c->obuf + c->opos, c->rbuf, c->rcount); | |
| 279 | ✗ | c->opos += c->rcount; | |
| 280 | ✗ | } | |
| 281 | |||
| 282 | ✗ | static size_t handle_invalid(struct cconv *c, const char *buf, size_t count) | |
| 283 | { | ||
| 284 | ✗ | LOG_DEBUG("%zu %zu", c->char_size, count); | |
| 285 | ✗ | add_replacement(c); | |
| 286 | ✗ | if (c->char_size == 0) { | |
| 287 | // Converting from UTF-8 | ||
| 288 | ✗ | size_t idx = 0; | |
| 289 | ✗ | CodePoint u = u_get_char(buf, count, &idx); | |
| 290 | ✗ | LOG_DEBUG("U+%04" PRIX32, u); | |
| 291 | ✗ | return idx; | |
| 292 | } | ||
| 293 | ✗ | if (c->char_size > count) { | |
| 294 | // wtf | ||
| 295 | ✗ | return 1; | |
| 296 | } | ||
| 297 | return c->char_size; | ||
| 298 | } | ||
| 299 | |||
| 300 | 1 | static int xiconv(struct cconv *c, const char **ib, size_t *ic) | |
| 301 | { | ||
| 302 | 1 | while (1) { | |
| 303 | 1 | char *ob = c->obuf + c->opos; | |
| 304 | 1 | size_t oc = c->osize - c->opos; | |
| 305 | 1 | size_t rc = iconv_wrapper(c->cd, ib, ic, &ob, &oc); | |
| 306 | 1 | c->opos = ob - c->obuf; | |
| 307 |
1/2✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→12) taken 1 times.
|
1 | if (rc == (size_t)-1) { |
| 308 | ✗ | switch (errno) { | |
| 309 | ✗ | case EILSEQ: | |
| 310 | ✗ | c->errors++; | |
| 311 | // Reset | ||
| 312 | ✗ | iconv(c->cd, NULL, NULL, NULL, NULL); | |
| 313 | ✗ | return errno; | |
| 314 | case EINVAL: | ||
| 315 | return errno; | ||
| 316 | ✗ | case E2BIG: | |
| 317 | ✗ | resize_obuf(c); | |
| 318 | ✗ | continue; | |
| 319 | ✗ | default: | |
| 320 | − | BUG("iconv: %s", strerror(errno)); | |
| 321 | } | ||
| 322 | } else { | ||
| 323 | 1 | c->errors += rc; | |
| 324 | } | ||
| 325 | 1 | return 0; | |
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | ✗ | static size_t convert_incomplete(struct cconv *c, const char *input, size_t len) | |
| 330 | { | ||
| 331 | ✗ | size_t ipos = 0; | |
| 332 | ✗ | while (c->tcount < sizeof(c->tbuf) && ipos < len) { | |
| 333 | ✗ | c->tbuf[c->tcount++] = input[ipos++]; | |
| 334 | ✗ | const char *ib = c->tbuf; | |
| 335 | ✗ | size_t ic = c->tcount; | |
| 336 | ✗ | int rc = xiconv(c, &ib, &ic); | |
| 337 | ✗ | if (ic > 0) { | |
| 338 | ✗ | memmove(c->tbuf, ib, ic); | |
| 339 | } | ||
| 340 | ✗ | c->tcount = ic; | |
| 341 | ✗ | if (rc == EINVAL) { | |
| 342 | // Incomplete character at end of input buffer; try again | ||
| 343 | // with more input data | ||
| 344 | ✗ | continue; | |
| 345 | } | ||
| 346 | ✗ | if (rc == EILSEQ) { | |
| 347 | // Invalid multibyte sequence | ||
| 348 | ✗ | size_t skip = handle_invalid(c, c->tbuf, c->tcount); | |
| 349 | ✗ | c->tcount -= skip; | |
| 350 | ✗ | if (c->tcount > 0) { | |
| 351 | ✗ | LOG_DEBUG("tcount=%zu, skip=%zu", c->tcount, skip); | |
| 352 | ✗ | memmove(c->tbuf, c->tbuf + skip, c->tcount); | |
| 353 | ✗ | continue; | |
| 354 | } | ||
| 355 | ✗ | return ipos; | |
| 356 | } | ||
| 357 | ✗ | break; | |
| 358 | } | ||
| 359 | |||
| 360 | ✗ | LOG_DEBUG("%zu %zu", ipos, c->tcount); | |
| 361 | ✗ | return ipos; | |
| 362 | } | ||
| 363 | |||
| 364 | 1 | static void cconv_process(struct cconv *c, const char *input, size_t len) | |
| 365 | { | ||
| 366 |
1/2✗ Branch 0 (2→3) not taken.
✓ Branch 1 (2→4) taken 1 times.
|
1 | if (c->consumed > 0) { |
| 367 | ✗ | size_t fill = c->opos - c->consumed; | |
| 368 | ✗ | memmove(c->obuf, c->obuf + c->consumed, fill); | |
| 369 | ✗ | c->opos = fill; | |
| 370 | ✗ | c->consumed = 0; | |
| 371 | } | ||
| 372 | |||
| 373 |
1/2✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→7) taken 1 times.
|
1 | if (c->tcount > 0) { |
| 374 | ✗ | size_t ipos = convert_incomplete(c, input, len); | |
| 375 | ✗ | input += ipos; | |
| 376 | ✗ | len -= ipos; | |
| 377 | } | ||
| 378 | |||
| 379 | 1 | const char *ib = input; | |
| 380 |
2/2✓ Branch 0 (17→8) taken 1 times.
✓ Branch 1 (17→18) taken 1 times.
|
2 | for (size_t ic = len; ic > 0; ) { |
| 381 | 1 | int r = xiconv(c, &ib, &ic); | |
| 382 |
1/2✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→13) taken 1 times.
|
1 | if (r == EINVAL) { |
| 383 | // Incomplete character at end of input buffer | ||
| 384 | ✗ | if (ic < sizeof(c->tbuf)) { | |
| 385 | ✗ | memcpy(c->tbuf, ib, ic); | |
| 386 | ✗ | c->tcount = ic; | |
| 387 | } else { | ||
| 388 | // FIXME | ||
| 389 | ✗ | } | |
| 390 | ✗ | ic = 0; | |
| 391 | ✗ | continue; | |
| 392 | } | ||
| 393 |
1/2✗ Branch 0 (13→14) not taken.
✓ Branch 1 (13→16) taken 1 times.
|
1 | if (r == EILSEQ) { |
| 394 | // Invalid multibyte sequence | ||
| 395 | ✗ | size_t skip = handle_invalid(c, ib, ic); | |
| 396 | ✗ | ic -= skip; | |
| 397 | ✗ | ib += skip; | |
| 398 | ✗ | continue; | |
| 399 | } | ||
| 400 | } | ||
| 401 | 1 | } | |
| 402 | |||
| 403 | ✗ | static struct cconv *cconv_to_utf8(const char *encoding) | |
| 404 | { | ||
| 405 | ✗ | iconv_t cd = iconv_open("UTF-8", encoding); | |
| 406 | ✗ | if (cd == (iconv_t)-1) { | |
| 407 | return NULL; | ||
| 408 | } | ||
| 409 | |||
| 410 | ✗ | struct cconv *c = create(cd); | |
| 411 | ✗ | c->rcount = copyliteral(c->rbuf, REPLACEMENT); | |
| 412 | |||
| 413 | ✗ | if (str_has_prefix(encoding, "UTF-16")) { | |
| 414 | ✗ | c->char_size = 2; | |
| 415 | ✗ | } else if (str_has_prefix(encoding, "UTF-32")) { | |
| 416 | ✗ | c->char_size = 4; | |
| 417 | } else { | ||
| 418 | ✗ | c->char_size = 1; | |
| 419 | } | ||
| 420 | |||
| 421 | return c; | ||
| 422 | } | ||
| 423 | |||
| 424 | 1 | static void encode_replacement(struct cconv *c) | |
| 425 | { | ||
| 426 | 1 | static const char rep[] = REPLACEMENT; | |
| 427 | 1 | const char *ib = rep; | |
| 428 | 1 | char *ob = c->rbuf; | |
| 429 | 1 | size_t ic = STRLEN(REPLACEMENT); | |
| 430 | 1 | size_t oc = sizeof(c->rbuf); | |
| 431 | 1 | size_t rc = iconv_wrapper(c->cd, &ib, &ic, &ob, &oc); | |
| 432 | |||
| 433 |
1/2✓ Branch 0 (3→4) taken 1 times.
✗ Branch 1 (3→5) not taken.
|
1 | if (rc == (size_t)-1) { |
| 434 | 1 | c->rbuf[0] = '\xbf'; | |
| 435 | 1 | c->rcount = 1; | |
| 436 | } else { | ||
| 437 | ✗ | c->rcount = ob - c->rbuf; | |
| 438 | } | ||
| 439 | 1 | } | |
| 440 | |||
| 441 | 1 | static struct cconv *cconv_from_utf8(const char *encoding) | |
| 442 | { | ||
| 443 | 1 | iconv_t cd = iconv_open(encoding, "UTF-8"); | |
| 444 |
1/2✓ Branch 0 (3→4) taken 1 times.
✗ Branch 1 (3→7) not taken.
|
1 | if (cd == (iconv_t)-1) { |
| 445 | return NULL; | ||
| 446 | } | ||
| 447 | 1 | struct cconv *c = create(cd); | |
| 448 | 1 | encode_replacement(c); | |
| 449 | 1 | return c; | |
| 450 | } | ||
| 451 | |||
| 452 | 1 | static void cconv_flush(struct cconv *c) | |
| 453 | { | ||
| 454 |
1/2✗ Branch 0 (2→3) not taken.
✓ Branch 1 (2→6) taken 1 times.
|
1 | if (c->tcount > 0) { |
| 455 | // Replace incomplete character at end of input buffer | ||
| 456 | ✗ | LOG_DEBUG("incomplete character at EOF"); | |
| 457 | ✗ | add_replacement(c); | |
| 458 | ✗ | c->tcount = 0; | |
| 459 | } | ||
| 460 | 1 | } | |
| 461 | |||
| 462 | ✗ | static char *cconv_consume_line(struct cconv *c, size_t *len) | |
| 463 | { | ||
| 464 | ✗ | char *line = c->obuf + c->consumed; | |
| 465 | ✗ | char *nl = memchr(line, '\n', c->opos - c->consumed); | |
| 466 | ✗ | if (!nl) { | |
| 467 | ✗ | *len = 0; | |
| 468 | ✗ | return NULL; | |
| 469 | } | ||
| 470 | |||
| 471 | ✗ | size_t n = nl - line + 1; | |
| 472 | ✗ | c->consumed += n; | |
| 473 | ✗ | *len = n; | |
| 474 | ✗ | return line; | |
| 475 | } | ||
| 476 | |||
| 477 | 1 | static char *cconv_consume_all(struct cconv *c, size_t *len) | |
| 478 | { | ||
| 479 | 1 | char *buf = c->obuf + c->consumed; | |
| 480 | 1 | *len = c->opos - c->consumed; | |
| 481 | 1 | c->consumed = c->opos; | |
| 482 | 1 | return buf; | |
| 483 | } | ||
| 484 | |||
| 485 | 1 | static void cconv_free(struct cconv *c) | |
| 486 | { | ||
| 487 | 1 | BUG_ON(!c); | |
| 488 | 1 | iconv_close(c->cd); | |
| 489 | 1 | free(c->obuf); | |
| 490 | 1 | free(c); | |
| 491 | 1 | } | |
| 492 | |||
| 493 | 2 | bool conversion_supported_by_iconv(const char *from, const char *to) | |
| 494 | { | ||
| 495 |
2/4✓ Branch 0 (2→3) taken 2 times.
✗ Branch 1 (2→4) not taken.
✗ Branch 2 (3→4) not taken.
✓ Branch 3 (3→5) taken 2 times.
|
2 | if (unlikely(from[0] == '\0' || to[0] == '\0')) { |
| 496 | ✗ | errno = EINVAL; | |
| 497 | ✗ | return false; | |
| 498 | } | ||
| 499 | |||
| 500 | 2 | iconv_t cd = iconv_open(to, from); | |
| 501 |
1/2✓ Branch 0 (6→7) taken 2 times.
✗ Branch 1 (6→9) not taken.
|
2 | if (cd == (iconv_t)-1) { |
| 502 | return false; | ||
| 503 | } | ||
| 504 | |||
| 505 | 2 | iconv_close(cd); | |
| 506 | 2 | return true; | |
| 507 | } | ||
| 508 | |||
| 509 | 21 | FileEncoder file_encoder(const char *encoding, bool crlf, int fd) | |
| 510 | { | ||
| 511 | 21 | struct cconv *cconv = NULL; | |
| 512 |
2/2✓ Branch 0 (3→4) taken 1 times.
✓ Branch 1 (3→7) taken 20 times.
|
21 | if (unlikely(!encoding_is_utf8(encoding))) { |
| 513 | 1 | cconv = cconv_from_utf8(encoding); | |
| 514 |
1/2✗ Branch 0 (5→6) not taken.
✓ Branch 1 (5→7) taken 1 times.
|
1 | if (!cconv) { |
| 515 | − | BUG("unsupported conversion; should have been handled earlier"); | |
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | 21 | return (FileEncoder) { | |
| 520 | .cconv = cconv, | ||
| 521 | .crlf = crlf, | ||
| 522 | .fd = fd, | ||
| 523 | }; | ||
| 524 | } | ||
| 525 | |||
| 526 | 21 | void file_encoder_free(FileEncoder *enc) | |
| 527 | { | ||
| 528 |
2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→4) taken 20 times.
|
21 | if (enc->cconv) { |
| 529 | 1 | cconv_free(enc->cconv); | |
| 530 | } | ||
| 531 | 21 | free(enc->nbuf); | |
| 532 | 21 | } | |
| 533 | |||
| 534 | // NOTE: buf must contain whole characters! | ||
| 535 | 21 | ssize_t file_encoder_write ( | |
| 536 | FileEncoder *enc, | ||
| 537 | const char *buf, | ||
| 538 | size_t size | ||
| 539 | ) { | ||
| 540 |
2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→5) taken 20 times.
|
21 | if (unlikely(enc->crlf)) { |
| 541 | 1 | size = unix_to_dos(enc, buf, size); | |
| 542 | 1 | buf = enc->nbuf; | |
| 543 | } | ||
| 544 |
2/2✓ Branch 0 (5→6) taken 1 times.
✓ Branch 1 (5→9) taken 20 times.
|
21 | if (unlikely(enc->cconv)) { |
| 545 | 1 | cconv_process(enc->cconv, buf, size); | |
| 546 | 1 | cconv_flush(enc->cconv); | |
| 547 | 1 | buf = cconv_consume_all(enc->cconv, &size); | |
| 548 | } | ||
| 549 | 21 | return xwrite_all(enc->fd, buf, size); | |
| 550 | } | ||
| 551 | |||
| 552 | 21 | size_t file_encoder_get_nr_errors(const FileEncoder *enc) | |
| 553 | { | ||
| 554 |
2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→4) taken 20 times.
|
21 | return enc->cconv ? enc->cconv->errors : 0; |
| 555 | } | ||
| 556 | |||
| 557 | ✗ | static bool fill(FileDecoder *dec) | |
| 558 | { | ||
| 559 | ✗ | if (dec->ipos == dec->isize) { | |
| 560 | return false; | ||
| 561 | } | ||
| 562 | |||
| 563 | // Smaller than cconv.obuf to make realloc less likely | ||
| 564 | ✗ | size_t max = 7 * 1024; | |
| 565 | |||
| 566 | ✗ | size_t icount = MIN(dec->isize - dec->ipos, max); | |
| 567 | ✗ | cconv_process(dec->cconv, dec->ibuf + dec->ipos, icount); | |
| 568 | ✗ | dec->ipos += icount; | |
| 569 | ✗ | if (dec->ipos == dec->isize) { | |
| 570 | // Must be flushed after all input has been fed | ||
| 571 | ✗ | cconv_flush(dec->cconv); | |
| 572 | } | ||
| 573 | return true; | ||
| 574 | } | ||
| 575 | |||
| 576 | ✗ | static bool decode_and_read_line(FileDecoder *dec, const char **linep, size_t *lenp) | |
| 577 | { | ||
| 578 | ✗ | char *line; | |
| 579 | ✗ | size_t len; | |
| 580 | ✗ | while (1) { | |
| 581 | ✗ | line = cconv_consume_line(dec->cconv, &len); | |
| 582 | ✗ | if (line || !fill(dec)) { | |
| 583 | break; | ||
| 584 | } | ||
| 585 | } | ||
| 586 | |||
| 587 | ✗ | if (line) { | |
| 588 | // Newline not wanted | ||
| 589 | ✗ | len--; | |
| 590 | } else { | ||
| 591 | ✗ | line = cconv_consume_all(dec->cconv, &len); | |
| 592 | ✗ | if (len == 0) { | |
| 593 | return false; | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 597 | ✗ | *linep = line; | |
| 598 | ✗ | *lenp = len; | |
| 599 | ✗ | return true; | |
| 600 | } | ||
| 601 | |||
| 602 | 34 | bool file_decoder_read(Buffer *buffer, const char *buf, size_t size) | |
| 603 | { | ||
| 604 |
1/2✓ Branch 0 (3→4) taken 34 times.
✗ Branch 1 (3→5) not taken.
|
34 | if (encoding_is_utf8(buffer->encoding)) { |
| 605 | 34 | return file_decoder_read_utf8(buffer, buf, size); | |
| 606 | } | ||
| 607 | |||
| 608 | ✗ | struct cconv *cconv = cconv_to_utf8(buffer->encoding); | |
| 609 | ✗ | if (!cconv) { | |
| 610 | return false; | ||
| 611 | } | ||
| 612 | |||
| 613 | ✗ | FileDecoder dec = { | |
| 614 | .ibuf = buf, | ||
| 615 | .isize = size, | ||
| 616 | .cconv = cconv, | ||
| 617 | }; | ||
| 618 | |||
| 619 | ✗ | const char *line; | |
| 620 | ✗ | size_t len; | |
| 621 | |||
| 622 | ✗ | if (decode_and_read_line(&dec, &line, &len)) { | |
| 623 | ✗ | if (len && line[len - 1] == '\r') { | |
| 624 | ✗ | buffer->crlf_newlines = true; | |
| 625 | ✗ | len--; | |
| 626 | } | ||
| 627 | ✗ | Block *blk = add_utf8_line(buffer, NULL, line, len); | |
| 628 | ✗ | while (decode_and_read_line(&dec, &line, &len)) { | |
| 629 | ✗ | if (buffer->crlf_newlines && len && line[len - 1] == '\r') { | |
| 630 | ✗ | len--; | |
| 631 | } | ||
| 632 | ✗ | blk = add_utf8_line(buffer, blk, line, len); | |
| 633 | } | ||
| 634 | ✗ | if (blk) { | |
| 635 | ✗ | add_block(buffer, blk); | |
| 636 | } | ||
| 637 | } | ||
| 638 | |||
| 639 | ✗ | cconv_free(cconv); | |
| 640 | ✗ | return true; | |
| 641 | } | ||
| 642 | |||
| 643 | #endif | ||
| 644 |