dte test coverage

Directory:	./
File:	src/convert.c
Date:	2025-02-14 16:55:22

	Exec	Total	Coverage
Lines:	151	289	52.2%
Functions:	20	28	71.4%
Branches:	56	138	40.6%

  
      Line
      Branch
      Exec
      Source
    
      #include <errno.h>
    
      #include <inttypes.h>
    
      #include <stdlib.h>
    
      #include <string.h>
    
      #include "convert.h"
    
      #include "block.h"
    
      #include "buildvar-iconv.h"
    
      #include "encoding.h"
    
      #include "util/debug.h"
    
      #include "util/intern.h"
    
      #include "util/list.h"
    
      #include "util/log.h"
    
      #include "util/str-util.h"
    
      #include "util/utf8.h"
    
      #include "util/xmalloc.h"
    
      #include "util/xreadwrite.h"
    
      enum {
    
          // If any line exceeds this length when reading a file, syntax
    
          // highlighting will be automatically disabled
    
          SYN_HIGHLIGHT_MAX_LINE_LEN = 512u << 10, // 512KiB
    
      };
    
      typedef struct {
    
          const unsigned char *ibuf;
    
          ssize_t ipos;
    
          ssize_t isize;
    
          struct cconv *cconv;
    
      } FileDecoder;
    
      56
      static void add_block(Buffer *buffer, Block *blk)
    
      {
    
      56
          buffer->nl += blk->nl;
    
      56
          list_insert_before(&blk->node, &buffer->blocks);
    
      56
      }
    
      7316
      static Block *add_utf8_line (
    
          Buffer *buffer,
    
          Block *blk,
    
          const unsigned char *line,
    
          size_t len
    
      ) {
    
      7316
          size_t size = len + 1;
    
        2/2✓ Branch 0 (2→3) taken 7288 times.
✓ Branch 1 (2→6) taken 28 times.

      7316
          if (blk) {
    
      7288
              size_t avail = blk->alloc - blk->size;
    
        2/2✓ Branch 0 (3→4) taken 7260 times.
✓ Branch 1 (3→5) taken 28 times.

      7288
              if (size <= avail) {
    
      7260
                  goto copy;
    
              }
    
      28
              add_block(buffer, blk);
    
          }
    
      56
          size = MAX(size, 8192);
    
      56
          blk = block_new(size);
    
      7316
      copy:
    
        1/4✗ Branch 0 (7→8) not taken.
✓ Branch 1 (7→11) taken 7316 times.
✗ Branch 2 (8→9) not taken.
✗ Branch 3 (8→11) not taken.

      7316
          if (unlikely(len > SYN_HIGHLIGHT_MAX_LINE_LEN && buffer->options.syntax)) {
    
              // TODO: Make the limit configurable and add documentation
    
              // TODO: Pass in an ErrorBuffer* and use error_msg() instead of LOG_NOTICE()
    
      ✗
              LOG_NOTICE (
    
                  "line length (%zu) exceeded limit (%ju); disabling syntax highlighting",
    
                  len, (uintmax_t)SYN_HIGHLIGHT_MAX_LINE_LEN
    
              );
    
      ✗
              buffer->options.syntax = false;
    
          }
    
      7316
          memcpy(blk->data + blk->size, line, len);
    
      7316
          blk->size += len;
    
      7316
          blk->data[blk->size++] = '\n';
    
      7316
          blk->nl++;
    
      7316
          return blk;
    
      }
    
      7347
      static bool read_utf8_line(FileDecoder *dec, const char **linep, size_t *lenp)
    
      {
    
      7347
          const char *line = dec->ibuf + dec->ipos;
    
      7347
          const char *nl = memchr(line, '\n', dec->isize - dec->ipos);
    
      7347
          size_t len;
    
        2/2✓ Branch 0 (2→3) taken 7314 times.
✓ Branch 1 (2→4) taken 33 times.

      7347
          if (nl) {
    
      7314
              len = nl - line;
    
      7314
              dec->ipos += len + 1;
    
          } else {
    
      33
              len = dec->isize - dec->ipos;
    
        2/2✓ Branch 0 (4→5) taken 2 times.
✓ Branch 1 (4→7) taken 31 times.

      33
              if (len == 0) {
    
                  return false;
    
              }
    
      2
              dec->ipos += len;
    
          }
    
      7316
          *linep = line;
    
      7316
          *lenp = len;
    
      7316
          return true;
    
      }
    
      31
      static bool file_decoder_read_utf8(Buffer *buffer, const unsigned char *buf, size_t size)
    
      {
    
        1/2✗ Branch 0 (3→4) not taken.
✓ Branch 1 (3→5) taken 31 times.

      31
          if (unlikely(!encoding_is_utf8(buffer->encoding))) {
    
      ✗
              errno = EINVAL;
    
      ✗
              return false;
    
          }
    
      31
          FileDecoder dec = {
    
              .ibuf = buf,
    
              .isize = size,
    
          };
    
      31
          const char *line;
    
      31
          size_t len;
    
        2/2✓ Branch 0 (6→7) taken 28 times.
✓ Branch 1 (6→23) taken 3 times.

      31
          if (!read_utf8_line(&dec, &line, &len)) {
    
              return true;
    
          }
    
        3/4✓ Branch 0 (7→8) taken 28 times.
✗ Branch 1 (7→10) not taken.
✓ Branch 2 (8→9) taken 1 times.
✓ Branch 3 (8→10) taken 27 times.

      28
          if (len && line[len - 1] == '\r') {
    
      1
              buffer->crlf_newlines = true;
    
      1
              len--;
    
          }
    
      28
          Block *blk = add_utf8_line(buffer, NULL, line, len);
    
        2/2✓ Branch 0 (11→16) taken 1 times.
✓ Branch 1 (11→19) taken 27 times.

      28
          if (unlikely(buffer->crlf_newlines)) {
    
        2/2✓ Branch 0 (17→12) taken 270 times.
✓ Branch 1 (17→21) taken 1 times.

      271
              while (read_utf8_line(&dec, &line, &len)) {
    
        4/4✓ Branch 0 (12→13) taken 268 times.
✓ Branch 1 (12→15) taken 2 times.
✓ Branch 2 (13→14) taken 1 times.
✓ Branch 3 (13→15) taken 267 times.

      270
                  if (len && line[len - 1] == '\r') {
    
      1
                      len--;
    
                  }
    
      270
                  blk = add_utf8_line(buffer, blk, line, len);
    
              }
    
          } else {
    
        2/2✓ Branch 0 (20→18) taken 7018 times.
✓ Branch 1 (20→21) taken 27 times.

      7045
              while (read_utf8_line(&dec, &line, &len)) {
    
      7018
                  blk = add_utf8_line(buffer, blk, line, len);
    
              }
    
          }
    
        1/2✓ Branch 0 (21→22) taken 28 times.
✗ Branch 1 (21→23) not taken.

      28
          if (blk) {
    
      28
              add_block(buffer, blk);
    
          }
    
          return true;
    
      }
    
      1
      static size_t unix_to_dos (
    
          FileEncoder *enc,
    
          const unsigned char *buf,
    
          size_t size
    
      ) {
    
          // TODO: Pass in Buffer::nl and make this size adjustment more conservative
    
          // (it's resized to handle the worst possible case, despite the fact that we
    
          // already have the number of newlines pre-computed)
    
        1/2✓ Branch 0 (2→3) taken 1 times.
✗ Branch 1 (2→8) not taken.

      1
          if (enc->nsize < size * 2) {
    
      1
              enc->nsize = size * 2;
    
      1
              enc->nbuf = xrealloc(enc->nbuf, enc->nsize);
    
          }
    
          // TODO: Optimize this loop, by making use of memccpy(3)
    
          size_t d = 0;
    
        2/2✓ Branch 0 (9→5) taken 21 times.
✓ Branch 1 (9→10) taken 1 times.

      22
          for (size_t s = 0; s < size; s++) {
    
      21
              unsigned char ch = buf[s];
    
        2/2✓ Branch 0 (5→6) taken 3 times.
✓ Branch 1 (5→7) taken 18 times.

      21
              if (ch == '\n') {
    
      3
                  enc->nbuf[d++] = '\r';
    
              }
    
      21
              enc->nbuf[d++] = ch;
    
          }
    
      1
          return d;
    
      }
    
      #if ICONV_DISABLE == 1 // iconv not available; use basic, UTF-8 implementation:
    
      bool conversion_supported_by_iconv (
    
          const char* UNUSED_ARG(from),
    
          const char* UNUSED_ARG(to)
    
      ) {
    
          errno = EINVAL;
    
          return false;
    
      }
    
      FileEncoder file_encoder(const char *encoding, bool crlf, int fd)
    
      {
    
          if (unlikely(!encoding_is_utf8(encoding))) {
    
              BUG("unsupported conversion; should have been handled earlier");
    
          }
    
          return (FileEncoder) {
    
              .crlf = crlf,
    
              .fd = fd,
    
          };
    
      }
    
      void file_encoder_free(FileEncoder *enc)
    
      {
    
          free(enc->nbuf);
    
      }
    
      ssize_t file_encoder_write(FileEncoder *enc, const unsigned char *buf, size_t n)
    
      {
    
          if (unlikely(enc->crlf)) {
    
              n = unix_to_dos(enc, buf, n);
    
              buf = enc->nbuf;
    
          }
    
          return xwrite_all(enc->fd, buf, n);
    
      }
    
      size_t file_encoder_get_nr_errors(const FileEncoder* UNUSED_ARG(enc))
    
      {
    
          return 0;
    
      }
    
      bool file_decoder_read(Buffer *buffer, const unsigned char *buf, size_t size)
    
      {
    
          return file_decoder_read_utf8(buffer, buf, size);
    
      }
    
      #else // ICONV_DISABLE != 1; use full iconv implementation:
    
      #include <iconv.h>
    
      // UTF-8 encoding of U+00BF (inverted question mark; "¿")
    
      #define REPLACEMENT "\xc2\xbf"
    
      struct cconv {
    
          iconv_t cd;
    
          char *obuf;
    
          size_t osize;
    
          size_t opos;
    
          size_t consumed;
    
          size_t errors;
    
          // Temporary input buffer
    
          char tbuf[16];
    
          size_t tcount;
    
          // REPLACEMENT character, in target encoding
    
          char rbuf[4];
    
          size_t rcount;
    
          // Input character size in bytes, or zero for UTF-8
    
          size_t char_size;
    
      };
    
      1
      static struct cconv *create(iconv_t cd)
    
      {
    
      1
          struct cconv *c = xnew0(struct cconv, 1);
    
      1
          c->cd = cd;
    
      1
          c->osize = 8192;
    
      1
          c->obuf = xmalloc(c->osize);
    
      1
          return c;
    
      }
    
      2
      static size_t iconv_wrapper (
    
          iconv_t cd,
    
          const char **restrict inbuf,
    
          size_t *restrict inbytesleft,
    
          char **restrict outbuf,
    
          size_t *restrict outbytesleft
    
      ) {
    
          // POSIX defines the second parameter of iconv(3) as "char **restrict"
    
          // but NetBSD declares it as "const char **restrict"
    
      #ifdef __NetBSD__
    
          const char **restrict in = inbuf;
    
      #else
    
      2
          char **restrict in = (char **restrict)inbuf;
    
      #endif
    
      2
          return iconv(cd, in, inbytesleft, outbuf, outbytesleft);
    
      }
    
      ✗
      static void resize_obuf(struct cconv *c)
    
      {
    
      ✗
          c->osize = xmul(2, c->osize);
    
      ✗
          c->obuf = xrealloc(c->obuf, c->osize);
    
      ✗
      }
    
      ✗
      static void add_replacement(struct cconv *c)
    
      {
    
      ✗
          if (c->osize - c->opos < 4) {
    
      ✗
              resize_obuf(c);
    
          }
    
      ✗
          memcpy(c->obuf + c->opos, c->rbuf, c->rcount);
    
      ✗
          c->opos += c->rcount;
    
      ✗
      }
    
      ✗
      static size_t handle_invalid(struct cconv *c, const char *buf, size_t count)
    
      {
    
      ✗
          LOG_DEBUG("%zu %zu", c->char_size, count);
    
      ✗
          add_replacement(c);
    
      ✗
          if (c->char_size == 0) {
    
              // Converting from UTF-8
    
      ✗
              size_t idx = 0;
    
      ✗
              CodePoint u = u_get_char(buf, count, &idx);
    
      ✗
              LOG_DEBUG("U+%04" PRIX32, u);
    
      ✗
              return idx;
    
          }
    
      ✗
          if (c->char_size > count) {
    
              // wtf
    
      ✗
              return 1;
    
          }
    
          return c->char_size;
    
      }
    
      1
      static int xiconv(struct cconv *c, const char **ib, size_t *ic)
    
      {
    
      1
          while (1) {
    
      1
              char *ob = c->obuf + c->opos;
    
      1
              size_t oc = c->osize - c->opos;
    
      1
              size_t rc = iconv_wrapper(c->cd, ib, ic, &ob, &oc);
    
      1
              c->opos = ob - c->obuf;
    
        1/2✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→12) taken 1 times.

      1
              if (rc == (size_t)-1) {
    
      ✗
                  switch (errno) {
    
      ✗
                  case EILSEQ:
    
      ✗
                      c->errors++;
    
                      // Reset
    
      ✗
                      iconv(c->cd, NULL, NULL, NULL, NULL);
    
      ✗
                      return errno;
    
                  case EINVAL:
    
                      return errno;
    
      ✗
                  case E2BIG:
    
      ✗
                      resize_obuf(c);
    
      ✗
                      continue;
    
      ✗
                  default:
    
      −
                      BUG("iconv: %s", strerror(errno));
    
                  }
    
              } else {
    
      1
                  c->errors += rc;
    
              }
    
      1
              return 0;
    
          }
    
      }
    
      ✗
      static size_t convert_incomplete(struct cconv *c, const char *input, size_t len)
    
      {
    
      ✗
          size_t ipos = 0;
    
      ✗
          while (c->tcount < sizeof(c->tbuf) && ipos < len) {
    
      ✗
              c->tbuf[c->tcount++] = input[ipos++];
    
      ✗
              const char *ib = c->tbuf;
    
      ✗
              size_t ic = c->tcount;
    
      ✗
              int rc = xiconv(c, &ib, &ic);
    
      ✗
              if (ic > 0) {
    
      ✗
                  memmove(c->tbuf, ib, ic);
    
              }
    
      ✗
              c->tcount = ic;
    
      ✗
              if (rc == EINVAL) {
    
                  // Incomplete character at end of input buffer; try again
    
                  // with more input data
    
      ✗
                  continue;
    
              }
    
      ✗
              if (rc == EILSEQ) {
    
                  // Invalid multibyte sequence
    
      ✗
                  size_t skip = handle_invalid(c, c->tbuf, c->tcount);
    
      ✗
                  c->tcount -= skip;
    
      ✗
                  if (c->tcount > 0) {
    
      ✗
                      LOG_DEBUG("tcount=%zu, skip=%zu", c->tcount, skip);
    
      ✗
                      memmove(c->tbuf, c->tbuf + skip, c->tcount);
    
      ✗
                      continue;
    
                  }
    
      ✗
                  return ipos;
    
              }
    
      ✗
              break;
    
          }
    
      ✗
          LOG_DEBUG("%zu %zu", ipos, c->tcount);
    
      ✗
          return ipos;
    
      }
    
      1
      static void cconv_process(struct cconv *c, const char *input, size_t len)
    
      {
    
        1/2✗ Branch 0 (2→3) not taken.
✓ Branch 1 (2→4) taken 1 times.

      1
          if (c->consumed > 0) {
    
      ✗
              size_t fill = c->opos - c->consumed;
    
      ✗
              memmove(c->obuf, c->obuf + c->consumed, fill);
    
      ✗
              c->opos = fill;
    
      ✗
              c->consumed = 0;
    
          }
    
        1/2✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→7) taken 1 times.

      1
          if (c->tcount > 0) {
    
      ✗
              size_t ipos = convert_incomplete(c, input, len);
    
      ✗
              input += ipos;
    
      ✗
              len -= ipos;
    
          }
    
      1
          const char *ib = input;
    
        2/2✓ Branch 0 (17→8) taken 1 times.
✓ Branch 1 (17→18) taken 1 times.

      2
          for (size_t ic = len; ic > 0; ) {
    
      1
              int r = xiconv(c, &ib, &ic);
    
        1/2✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→13) taken 1 times.

      1
              if (r == EINVAL) {
    
                  // Incomplete character at end of input buffer
    
      ✗
                  if (ic < sizeof(c->tbuf)) {
    
      ✗
                      memcpy(c->tbuf, ib, ic);
    
      ✗
                      c->tcount = ic;
    
                  } else {
    
                      // FIXME
    
      ✗
                  }
    
      ✗
                  ic = 0;
    
      ✗
                  continue;
    
              }
    
        1/2✗ Branch 0 (13→14) not taken.
✓ Branch 1 (13→16) taken 1 times.

      1
              if (r == EILSEQ) {
    
                  // Invalid multibyte sequence
    
      ✗
                  size_t skip = handle_invalid(c, ib, ic);
    
      ✗
                  ic -= skip;
    
      ✗
                  ib += skip;
    
      ✗
                  continue;
    
              }
    
          }
    
      1
      }
    
      ✗
      static struct cconv *cconv_to_utf8(const char *encoding)
    
      {
    
      ✗
          iconv_t cd = iconv_open("UTF-8", encoding);
    
      ✗
          if (cd == (iconv_t)-1) {
    
              return NULL;
    
          }
    
      ✗
          struct cconv *c = create(cd);
    
      ✗
          c->rcount = copyliteral(c->rbuf, REPLACEMENT);
    
      ✗
          if (str_has_prefix(encoding, "UTF-16")) {
    
      ✗
              c->char_size = 2;
    
      ✗
          } else if (str_has_prefix(encoding, "UTF-32")) {
    
      ✗
              c->char_size = 4;
    
          } else {
    
      ✗
              c->char_size = 1;
    
          }
    
          return c;
    
      }
    
      1
      static void encode_replacement(struct cconv *c)
    
      {
    
      1
          static const unsigned char rep[] = REPLACEMENT;
    
      1
          const char *ib = rep;
    
      1
          char *ob = c->rbuf;
    
      1
          size_t ic = STRLEN(REPLACEMENT);
    
      1
          size_t oc = sizeof(c->rbuf);
    
      1
          size_t rc = iconv_wrapper(c->cd, &ib, &ic, &ob, &oc);
    
        1/2✓ Branch 0 (3→4) taken 1 times.
✗ Branch 1 (3→5) not taken.

      1
          if (rc == (size_t)-1) {
    
      1
              c->rbuf[0] = '\xbf';
    
      1
              c->rcount = 1;
    
          } else {
    
      ✗
              c->rcount = ob - c->rbuf;
    
          }
    
      1
      }
    
      1
      static struct cconv *cconv_from_utf8(const char *encoding)
    
      {
    
      1
          iconv_t cd = iconv_open(encoding, "UTF-8");
    
        1/2✓ Branch 0 (3→4) taken 1 times.
✗ Branch 1 (3→7) not taken.

      1
          if (cd == (iconv_t)-1) {
    
              return NULL;
    
          }
    
      1
          struct cconv *c = create(cd);
    
      1
          encode_replacement(c);
    
      1
          return c;
    
      }
    
      1
      static void cconv_flush(struct cconv *c)
    
      {
    
        1/2✗ Branch 0 (2→3) not taken.
✓ Branch 1 (2→6) taken 1 times.

      1
          if (c->tcount > 0) {
    
              // Replace incomplete character at end of input buffer
    
      ✗
              LOG_DEBUG("incomplete character at EOF");
    
      ✗
              add_replacement(c);
    
      ✗
              c->tcount = 0;
    
          }
    
      1
      }
    
      ✗
      static char *cconv_consume_line(struct cconv *c, size_t *len)
    
      {
    
      ✗
          char *line = c->obuf + c->consumed;
    
      ✗
          char *nl = memchr(line, '\n', c->opos - c->consumed);
    
      ✗
          if (!nl) {
    
      ✗
              *len = 0;
    
      ✗
              return NULL;
    
          }
    
      ✗
          size_t n = nl - line + 1;
    
      ✗
          c->consumed += n;
    
      ✗
          *len = n;
    
      ✗
          return line;
    
      }
    
      1
      static char *cconv_consume_all(struct cconv *c, size_t *len)
    
      {
    
      1
          char *buf = c->obuf + c->consumed;
    
      1
          *len = c->opos - c->consumed;
    
      1
          c->consumed = c->opos;
    
      1
          return buf;
    
      }
    
      1
      static void cconv_free(struct cconv *c)
    
      {
    
      1
          BUG_ON(!c);
    
      1
          iconv_close(c->cd);
    
      1
          free(c->obuf);
    
      1
          free(c);
    
      1
      }
    
      2
      bool conversion_supported_by_iconv(const char *from, const char *to)
    
      {
    
        2/4✓ Branch 0 (2→3) taken 2 times.
✗ Branch 1 (2→4) not taken.
✗ Branch 2 (3→4) not taken.
✓ Branch 3 (3→5) taken 2 times.

      2
          if (unlikely(from[0] == '\0' || to[0] == '\0')) {
    
      ✗
              errno = EINVAL;
    
      ✗
              return false;
    
          }
    
      2
          iconv_t cd = iconv_open(to, from);
    
        1/2✓ Branch 0 (6→7) taken 2 times.
✗ Branch 1 (6→9) not taken.

      2
          if (cd == (iconv_t)-1) {
    
              return false;
    
          }
    
      2
          iconv_close(cd);
    
      2
          return true;
    
      }
    
      21
      FileEncoder file_encoder(const char *encoding, bool crlf, int fd)
    
      {
    
      21
          struct cconv *cconv = NULL;
    
        2/2✓ Branch 0 (3→4) taken 1 times.
✓ Branch 1 (3→7) taken 20 times.

      21
          if (unlikely(!encoding_is_utf8(encoding))) {
    
      1
              cconv = cconv_from_utf8(encoding);
    
        1/2✗ Branch 0 (5→6) not taken.
✓ Branch 1 (5→7) taken 1 times.

      1
              if (!cconv) {
    
      −
                  BUG("unsupported conversion; should have been handled earlier");
    
              }
    
          }
    
      21
          return (FileEncoder) {
    
              .cconv = cconv,
    
              .crlf = crlf,
    
              .fd = fd,
    
          };
    
      }
    
      21
      void file_encoder_free(FileEncoder *enc)
    
      {
    
        2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→4) taken 20 times.

      21
          if (enc->cconv) {
    
      1
              cconv_free(enc->cconv);
    
          }
    
      21
          free(enc->nbuf);
    
      21
      }
    
      // NOTE: buf must contain whole characters!
    
      21
      ssize_t file_encoder_write (
    
          FileEncoder *enc,
    
          const unsigned char *buf,
    
          size_t size
    
      ) {
    
        2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→5) taken 20 times.

      21
          if (unlikely(enc->crlf)) {
    
      1
              size = unix_to_dos(enc, buf, size);
    
      1
              buf = enc->nbuf;
    
          }
    
        2/2✓ Branch 0 (5→6) taken 1 times.
✓ Branch 1 (5→9) taken 20 times.

      21
          if (unlikely(enc->cconv)) {
    
      1
              cconv_process(enc->cconv, buf, size);
    
      1
              cconv_flush(enc->cconv);
    
      1
              buf = cconv_consume_all(enc->cconv, &size);
    
          }
    
      21
          return xwrite_all(enc->fd, buf, size);
    
      }
    
      21
      size_t file_encoder_get_nr_errors(const FileEncoder *enc)
    
      {
    
        2/2✓ Branch 0 (2→3) taken 1 times.
✓ Branch 1 (2→4) taken 20 times.

      21
          return enc->cconv ? enc->cconv->errors : 0;
    
      }
    
      ✗
      static bool fill(FileDecoder *dec)
    
      {
    
      ✗
          if (dec->ipos == dec->isize) {
    
              return false;
    
          }
    
          // Smaller than cconv.obuf to make realloc less likely
    
      ✗
          size_t max = 7 * 1024;
    
      ✗
          size_t icount = MIN(dec->isize - dec->ipos, max);
    
      ✗
          cconv_process(dec->cconv, dec->ibuf + dec->ipos, icount);
    
      ✗
          dec->ipos += icount;
    
      ✗
          if (dec->ipos == dec->isize) {
    
              // Must be flushed after all input has been fed
    
      ✗
              cconv_flush(dec->cconv);
    
          }
    
          return true;
    
      }
    
      ✗
      static bool decode_and_read_line(FileDecoder *dec, const char **linep, size_t *lenp)
    
      {
    
      ✗
          char *line;
    
      ✗
          size_t len;
    
      ✗
          while (1) {
    
      ✗
              line = cconv_consume_line(dec->cconv, &len);
    
      ✗
              if (line || !fill(dec)) {
    
                  break;
    
              }
    
          }
    
      ✗
          if (line) {
    
              // Newline not wanted
    
      ✗
              len--;
    
          } else {
    
      ✗
              line = cconv_consume_all(dec->cconv, &len);
    
      ✗
              if (len == 0) {
    
                  return false;
    
              }
    
          }
    
      ✗
          *linep = line;
    
      ✗
          *lenp = len;
    
      ✗
          return true;
    
      }
    
      31
      bool file_decoder_read(Buffer *buffer, const unsigned char *buf, size_t size)
    
      {
    
        1/2✓ Branch 0 (3→4) taken 31 times.
✗ Branch 1 (3→5) not taken.

      31
          if (encoding_is_utf8(buffer->encoding)) {
    
      31
              return file_decoder_read_utf8(buffer, buf, size);
    
          }
    
      ✗
          struct cconv *cconv = cconv_to_utf8(buffer->encoding);
    
      ✗
          if (!cconv) {
    
              return false;
    
          }
    
      ✗
          FileDecoder dec = {
    
              .ibuf = buf,
    
              .isize = size,
    
              .cconv = cconv,
    
          };
    
      ✗
          const char *line;
    
      ✗
          size_t len;
    
      ✗
          if (decode_and_read_line(&dec, &line, &len)) {
    
      ✗
              if (len && line[len - 1] == '\r') {
    
      ✗
                  buffer->crlf_newlines = true;
    
      ✗
                  len--;
    
              }
    
      ✗
              Block *blk = add_utf8_line(buffer, NULL, line, len);
    
      ✗
              while (decode_and_read_line(&dec, &line, &len)) {
    
      ✗
                  if (buffer->crlf_newlines && len && line[len - 1] == '\r') {
    
      ✗
                      len--;
    
                  }
    
      ✗
                  blk = add_utf8_line(buffer, blk, line, len);
    
              }
    
      ✗
              if (blk) {
    
      ✗
                  add_block(buffer, blk);
    
              }
    
          }
    
      ✗
          cconv_free(cconv);
    
      ✗
          return true;
    
      }
    
      #endif

Line	Branch	Exec	Source
1			#include <errno.h>
2			#include <inttypes.h>
3			#include <stdlib.h>
4			#include <string.h>
5			#include "convert.h"
6			#include "block.h"
7			#include "buildvar-iconv.h"
8			#include "encoding.h"
9			#include "util/debug.h"
10			#include "util/intern.h"
11			#include "util/list.h"
12			#include "util/log.h"
13			#include "util/str-util.h"
14			#include "util/utf8.h"
15			#include "util/xmalloc.h"
16			#include "util/xreadwrite.h"
17
18			enum {
19			// If any line exceeds this length when reading a file, syntax
20			// highlighting will be automatically disabled
21			SYN_HIGHLIGHT_MAX_LINE_LEN = 512u << 10, // 512KiB
22			};
23
24			typedef struct {
25			const unsigned char *ibuf;
26			ssize_t ipos;
27			ssize_t isize;
28			struct cconv *cconv;
29			} FileDecoder;
30
31		56	static void add_block(Buffer buffer, Block blk)
32			{
33		56	buffer->nl += blk->nl;
34		56	list_insert_before(&blk->node, &buffer->blocks);
35		56	}
36
37		7316	static Block *add_utf8_line (
38			Buffer *buffer,
39			Block *blk,
40			const unsigned char *line,
41			size_t len
42			) {
43		7316	size_t size = len + 1;
44	2/2 ✓ Branch 0 (2→3) taken 7288 times. ✓ Branch 1 (2→6) taken 28 times.	7316	if (blk) {
45		7288	size_t avail = blk->alloc - blk->size;
46	2/2 ✓ Branch 0 (3→4) taken 7260 times. ✓ Branch 1 (3→5) taken 28 times.	7288	if (size <= avail) {
47		7260	goto copy;
48			}
49		28	add_block(buffer, blk);
50			}
51		56	size = MAX(size, 8192);
52		56	blk = block_new(size);
53
54		7316	copy:
55	1/4 ✗ Branch 0 (7→8) not taken. ✓ Branch 1 (7→11) taken 7316 times. ✗ Branch 2 (8→9) not taken. ✗ Branch 3 (8→11) not taken.	7316	if (unlikely(len > SYN_HIGHLIGHT_MAX_LINE_LEN && buffer->options.syntax)) {
56			// TODO: Make the limit configurable and add documentation
57			// TODO: Pass in an ErrorBuffer* and use error_msg() instead of LOG_NOTICE()
58		✗	LOG_NOTICE (
59			"line length (%zu) exceeded limit (%ju); disabling syntax highlighting",
60			len, (uintmax_t)SYN_HIGHLIGHT_MAX_LINE_LEN
61			);
62		✗	buffer->options.syntax = false;
63			}
64
65		7316	memcpy(blk->data + blk->size, line, len);
66		7316	blk->size += len;
67		7316	blk->data[blk->size++] = '\n';
68		7316	blk->nl++;
69		7316	return blk;
70			}
71
72		7347	static bool read_utf8_line(FileDecoder dec, const char linep, size_t lenp)
73			{
74		7347	const char *line = dec->ibuf + dec->ipos;
75		7347	const char *nl = memchr(line, '\n', dec->isize - dec->ipos);
76		7347	size_t len;
77
78	2/2 ✓ Branch 0 (2→3) taken 7314 times. ✓ Branch 1 (2→4) taken 33 times.	7347	if (nl) {
79		7314	len = nl - line;
80		7314	dec->ipos += len + 1;
81			} else {
82		33	len = dec->isize - dec->ipos;
83	2/2 ✓ Branch 0 (4→5) taken 2 times. ✓ Branch 1 (4→7) taken 31 times.	33	if (len == 0) {
84			return false;
85			}
86		2	dec->ipos += len;
87			}
88
89		7316	*linep = line;
90		7316	*lenp = len;
91		7316	return true;
92			}
93
94		31	static bool file_decoder_read_utf8(Buffer buffer, const unsigned char buf, size_t size)
95			{
96	1/2 ✗ Branch 0 (3→4) not taken. ✓ Branch 1 (3→5) taken 31 times.	31	if (unlikely(!encoding_is_utf8(buffer->encoding))) {
97		✗	errno = EINVAL;
98		✗	return false;
99			}
100
101		31	FileDecoder dec = {
102			.ibuf = buf,
103			.isize = size,
104			};
105
106		31	const char *line;
107		31	size_t len;
108
109	2/2 ✓ Branch 0 (6→7) taken 28 times. ✓ Branch 1 (6→23) taken 3 times.	31	if (!read_utf8_line(&dec, &line, &len)) {
110			return true;
111			}
112
113	3/4 ✓ Branch 0 (7→8) taken 28 times. ✗ Branch 1 (7→10) not taken. ✓ Branch 2 (8→9) taken 1 times. ✓ Branch 3 (8→10) taken 27 times.	28	if (len && line[len - 1] == '\r') {
114		1	buffer->crlf_newlines = true;
115		1	len--;
116			}
117
118		28	Block *blk = add_utf8_line(buffer, NULL, line, len);
119
120	2/2 ✓ Branch 0 (11→16) taken 1 times. ✓ Branch 1 (11→19) taken 27 times.	28	if (unlikely(buffer->crlf_newlines)) {
121	2/2 ✓ Branch 0 (17→12) taken 270 times. ✓ Branch 1 (17→21) taken 1 times.	271	while (read_utf8_line(&dec, &line, &len)) {
122	4/4 ✓ Branch 0 (12→13) taken 268 times. ✓ Branch 1 (12→15) taken 2 times. ✓ Branch 2 (13→14) taken 1 times. ✓ Branch 3 (13→15) taken 267 times.	270	if (len && line[len - 1] == '\r') {
123		1	len--;
124			}
125		270	blk = add_utf8_line(buffer, blk, line, len);
126			}
127			} else {
128	2/2 ✓ Branch 0 (20→18) taken 7018 times. ✓ Branch 1 (20→21) taken 27 times.	7045	while (read_utf8_line(&dec, &line, &len)) {
129		7018	blk = add_utf8_line(buffer, blk, line, len);
130			}
131			}
132
133	1/2 ✓ Branch 0 (21→22) taken 28 times. ✗ Branch 1 (21→23) not taken.	28	if (blk) {
134		28	add_block(buffer, blk);
135			}
136
137			return true;
138			}
139
140		1	static size_t unix_to_dos (
141			FileEncoder *enc,
142			const unsigned char *buf,
143			size_t size
144			) {
145			// TODO: Pass in Buffer::nl and make this size adjustment more conservative
146			// (it's resized to handle the worst possible case, despite the fact that we
147			// already have the number of newlines pre-computed)
148	1/2 ✓ Branch 0 (2→3) taken 1 times. ✗ Branch 1 (2→8) not taken.	1	if (enc->nsize < size * 2) {
149		1	enc->nsize = size * 2;
150		1	enc->nbuf = xrealloc(enc->nbuf, enc->nsize);
151			}
152
153			// TODO: Optimize this loop, by making use of memccpy(3)
154			size_t d = 0;
155	2/2 ✓ Branch 0 (9→5) taken 21 times. ✓ Branch 1 (9→10) taken 1 times.	22	for (size_t s = 0; s < size; s++) {
156		21	unsigned char ch = buf[s];
157	2/2 ✓ Branch 0 (5→6) taken 3 times. ✓ Branch 1 (5→7) taken 18 times.	21	if (ch == '\n') {
158		3	enc->nbuf[d++] = '\r';
159			}
160		21	enc->nbuf[d++] = ch;
161			}
162
163		1	return d;
164			}
165
166			#if ICONV_DISABLE == 1 // iconv not available; use basic, UTF-8 implementation:
167
168			bool conversion_supported_by_iconv (
169			const char* UNUSED_ARG(from),
170			const char* UNUSED_ARG(to)
171			) {
172			errno = EINVAL;
173			return false;
174			}
175
176			FileEncoder file_encoder(const char *encoding, bool crlf, int fd)
177			{
178			if (unlikely(!encoding_is_utf8(encoding))) {
179			BUG("unsupported conversion; should have been handled earlier");
180			}
181
182			return (FileEncoder) {
183			.crlf = crlf,
184			.fd = fd,
185			};
186			}
187
188			void file_encoder_free(FileEncoder *enc)
189			{
190			free(enc->nbuf);
191			}
192
193			ssize_t file_encoder_write(FileEncoder enc, const unsigned char buf, size_t n)
194			{
195			if (unlikely(enc->crlf)) {
196			n = unix_to_dos(enc, buf, n);
197			buf = enc->nbuf;
198			}
199			return xwrite_all(enc->fd, buf, n);
200			}
201
202			size_t file_encoder_get_nr_errors(const FileEncoder* UNUSED_ARG(enc))
203			{
204			return 0;
205			}
206
207			bool file_decoder_read(Buffer buffer, const unsigned char buf, size_t size)
208			{
209			return file_decoder_read_utf8(buffer, buf, size);
210			}
211
212			#else // ICONV_DISABLE != 1; use full iconv implementation:
213
214			#include <iconv.h>
215
216			// UTF-8 encoding of U+00BF (inverted question mark; "¿")
217			#define REPLACEMENT "\xc2\xbf"
218
219			struct cconv {
220			iconv_t cd;
221			char *obuf;
222			size_t osize;
223			size_t opos;
224			size_t consumed;
225			size_t errors;
226
227			// Temporary input buffer
228			char tbuf[16];
229			size_t tcount;
230
231			// REPLACEMENT character, in target encoding
232			char rbuf[4];
233			size_t rcount;
234
235			// Input character size in bytes, or zero for UTF-8
236			size_t char_size;
237			};
238
239		1	static struct cconv *create(iconv_t cd)
240			{
241		1	struct cconv *c = xnew0(struct cconv, 1);
242		1	c->cd = cd;
243		1	c->osize = 8192;
244		1	c->obuf = xmalloc(c->osize);
245		1	return c;
246			}
247
248		2	static size_t iconv_wrapper (
249			iconv_t cd,
250			const char **restrict inbuf,
251			size_t *restrict inbytesleft,
252			char **restrict outbuf,
253			size_t *restrict outbytesleft
254			) {
255			// POSIX defines the second parameter of iconv(3) as "char **restrict"
256			// but NetBSD declares it as "const char **restrict"
257			#ifdef __NetBSD__
258			const char **restrict in = inbuf;
259			#else
260		2	char restrict in = (char restrict)inbuf;
261			#endif
262
263		2	return iconv(cd, in, inbytesleft, outbuf, outbytesleft);
264			}
265
266		✗	static void resize_obuf(struct cconv *c)
267			{
268		✗	c->osize = xmul(2, c->osize);
269		✗	c->obuf = xrealloc(c->obuf, c->osize);
270		✗	}
271
272		✗	static void add_replacement(struct cconv *c)
273			{
274		✗	if (c->osize - c->opos < 4) {
275		✗	resize_obuf(c);
276			}
277
278		✗	memcpy(c->obuf + c->opos, c->rbuf, c->rcount);
279		✗	c->opos += c->rcount;
280		✗	}
281
282		✗	static size_t handle_invalid(struct cconv c, const char buf, size_t count)
283			{
284		✗	LOG_DEBUG("%zu %zu", c->char_size, count);
285		✗	add_replacement(c);
286		✗	if (c->char_size == 0) {
287			// Converting from UTF-8
288		✗	size_t idx = 0;
289		✗	CodePoint u = u_get_char(buf, count, &idx);
290		✗	LOG_DEBUG("U+%04" PRIX32, u);
291		✗	return idx;
292			}
293		✗	if (c->char_size > count) {
294			// wtf
295		✗	return 1;
296			}
297			return c->char_size;
298			}
299
300		1	static int xiconv(struct cconv c, const char ib, size_t ic)
301			{
302		1	while (1) {
303		1	char *ob = c->obuf + c->opos;
304		1	size_t oc = c->osize - c->opos;
305		1	size_t rc = iconv_wrapper(c->cd, ib, ic, &ob, &oc);
306		1	c->opos = ob - c->obuf;
307	1/2 ✗ Branch 0 (4→5) not taken. ✓ Branch 1 (4→12) taken 1 times.	1	if (rc == (size_t)-1) {
308		✗	switch (errno) {
309		✗	case EILSEQ:
310		✗	c->errors++;
311			// Reset
312		✗	iconv(c->cd, NULL, NULL, NULL, NULL);
313		✗	return errno;
314			case EINVAL:
315			return errno;
316		✗	case E2BIG:
317		✗	resize_obuf(c);
318		✗	continue;
319		✗	default:
320		−	BUG("iconv: %s", strerror(errno));
321			}
322			} else {
323		1	c->errors += rc;
324			}
325		1	return 0;
326			}
327			}
328
329		✗	static size_t convert_incomplete(struct cconv c, const char input, size_t len)
330			{
331		✗	size_t ipos = 0;
332		✗	while (c->tcount < sizeof(c->tbuf) && ipos < len) {
333		✗	c->tbuf[c->tcount++] = input[ipos++];
334		✗	const char *ib = c->tbuf;
335		✗	size_t ic = c->tcount;
336		✗	int rc = xiconv(c, &ib, &ic);
337		✗	if (ic > 0) {
338		✗	memmove(c->tbuf, ib, ic);
339			}
340		✗	c->tcount = ic;
341		✗	if (rc == EINVAL) {
342			// Incomplete character at end of input buffer; try again
343			// with more input data
344		✗	continue;
345			}
346		✗	if (rc == EILSEQ) {
347			// Invalid multibyte sequence
348		✗	size_t skip = handle_invalid(c, c->tbuf, c->tcount);
349		✗	c->tcount -= skip;
350		✗	if (c->tcount > 0) {
351		✗	LOG_DEBUG("tcount=%zu, skip=%zu", c->tcount, skip);
352		✗	memmove(c->tbuf, c->tbuf + skip, c->tcount);
353		✗	continue;
354			}
355		✗	return ipos;
356			}
357		✗	break;
358			}
359
360		✗	LOG_DEBUG("%zu %zu", ipos, c->tcount);
361		✗	return ipos;
362			}
363
364		1	static void cconv_process(struct cconv c, const char input, size_t len)
365			{
366	1/2 ✗ Branch 0 (2→3) not taken. ✓ Branch 1 (2→4) taken 1 times.	1	if (c->consumed > 0) {
367		✗	size_t fill = c->opos - c->consumed;
368		✗	memmove(c->obuf, c->obuf + c->consumed, fill);
369		✗	c->opos = fill;
370		✗	c->consumed = 0;
371			}
372
373	1/2 ✗ Branch 0 (4→5) not taken. ✓ Branch 1 (4→7) taken 1 times.	1	if (c->tcount > 0) {
374		✗	size_t ipos = convert_incomplete(c, input, len);
375		✗	input += ipos;
376		✗	len -= ipos;
377			}
378
379		1	const char *ib = input;
380	2/2 ✓ Branch 0 (17→8) taken 1 times. ✓ Branch 1 (17→18) taken 1 times.	2	for (size_t ic = len; ic > 0; ) {
381		1	int r = xiconv(c, &ib, &ic);
382	1/2 ✗ Branch 0 (9→10) not taken. ✓ Branch 1 (9→13) taken 1 times.	1	if (r == EINVAL) {
383			// Incomplete character at end of input buffer
384		✗	if (ic < sizeof(c->tbuf)) {
385		✗	memcpy(c->tbuf, ib, ic);
386		✗	c->tcount = ic;
387			} else {
388			// FIXME
389		✗	}
390		✗	ic = 0;
391		✗	continue;
392			}
393	1/2 ✗ Branch 0 (13→14) not taken. ✓ Branch 1 (13→16) taken 1 times.	1	if (r == EILSEQ) {
394			// Invalid multibyte sequence
395		✗	size_t skip = handle_invalid(c, ib, ic);
396		✗	ic -= skip;
397		✗	ib += skip;
398		✗	continue;
399			}
400			}
401		1	}
402
403		✗	static struct cconv cconv_to_utf8(const char encoding)
404			{
405		✗	iconv_t cd = iconv_open("UTF-8", encoding);
406		✗	if (cd == (iconv_t)-1) {
407			return NULL;
408			}
409
410		✗	struct cconv *c = create(cd);
411		✗	c->rcount = copyliteral(c->rbuf, REPLACEMENT);
412
413		✗	if (str_has_prefix(encoding, "UTF-16")) {
414		✗	c->char_size = 2;
415		✗	} else if (str_has_prefix(encoding, "UTF-32")) {
416		✗	c->char_size = 4;
417			} else {
418		✗	c->char_size = 1;
419			}
420
421			return c;
422			}
423
424		1	static void encode_replacement(struct cconv *c)
425			{
426		1	static const unsigned char rep[] = REPLACEMENT;
427		1	const char *ib = rep;
428		1	char *ob = c->rbuf;
429		1	size_t ic = STRLEN(REPLACEMENT);
430		1	size_t oc = sizeof(c->rbuf);
431		1	size_t rc = iconv_wrapper(c->cd, &ib, &ic, &ob, &oc);
432
433	1/2 ✓ Branch 0 (3→4) taken 1 times. ✗ Branch 1 (3→5) not taken.	1	if (rc == (size_t)-1) {
434		1	c->rbuf[0] = '\xbf';
435		1	c->rcount = 1;
436			} else {
437		✗	c->rcount = ob - c->rbuf;
438			}
439		1	}
440
441		1	static struct cconv cconv_from_utf8(const char encoding)
442			{
443		1	iconv_t cd = iconv_open(encoding, "UTF-8");
444	1/2 ✓ Branch 0 (3→4) taken 1 times. ✗ Branch 1 (3→7) not taken.	1	if (cd == (iconv_t)-1) {
445			return NULL;
446			}
447		1	struct cconv *c = create(cd);
448		1	encode_replacement(c);
449		1	return c;
450			}
451
452		1	static void cconv_flush(struct cconv *c)
453			{
454	1/2 ✗ Branch 0 (2→3) not taken. ✓ Branch 1 (2→6) taken 1 times.	1	if (c->tcount > 0) {
455			// Replace incomplete character at end of input buffer
456		✗	LOG_DEBUG("incomplete character at EOF");
457		✗	add_replacement(c);
458		✗	c->tcount = 0;
459			}
460		1	}
461
462		✗	static char cconv_consume_line(struct cconv c, size_t *len)
463			{
464		✗	char *line = c->obuf + c->consumed;
465		✗	char *nl = memchr(line, '\n', c->opos - c->consumed);
466		✗	if (!nl) {
467		✗	*len = 0;
468		✗	return NULL;
469			}
470
471		✗	size_t n = nl - line + 1;
472		✗	c->consumed += n;
473		✗	*len = n;
474		✗	return line;
475			}
476
477		1	static char cconv_consume_all(struct cconv c, size_t *len)
478			{
479		1	char *buf = c->obuf + c->consumed;
480		1	*len = c->opos - c->consumed;
481		1	c->consumed = c->opos;
482		1	return buf;
483			}
484
485		1	static void cconv_free(struct cconv *c)
486			{
487		1	BUG_ON(!c);
488		1	iconv_close(c->cd);
489		1	free(c->obuf);
490		1	free(c);
491		1	}
492
493		2	bool conversion_supported_by_iconv(const char from, const char to)
494			{
495	2/4 ✓ Branch 0 (2→3) taken 2 times. ✗ Branch 1 (2→4) not taken. ✗ Branch 2 (3→4) not taken. ✓ Branch 3 (3→5) taken 2 times.	2	if (unlikely(from[0] == '\0' \|\| to[0] == '\0')) {
496		✗	errno = EINVAL;
497		✗	return false;
498			}
499
500		2	iconv_t cd = iconv_open(to, from);
501	1/2 ✓ Branch 0 (6→7) taken 2 times. ✗ Branch 1 (6→9) not taken.	2	if (cd == (iconv_t)-1) {
502			return false;
503			}
504
505		2	iconv_close(cd);
506		2	return true;
507			}
508
509		21	FileEncoder file_encoder(const char *encoding, bool crlf, int fd)
510			{
511		21	struct cconv *cconv = NULL;
512	2/2 ✓ Branch 0 (3→4) taken 1 times. ✓ Branch 1 (3→7) taken 20 times.	21	if (unlikely(!encoding_is_utf8(encoding))) {
513		1	cconv = cconv_from_utf8(encoding);
514	1/2 ✗ Branch 0 (5→6) not taken. ✓ Branch 1 (5→7) taken 1 times.	1	if (!cconv) {
515		−	BUG("unsupported conversion; should have been handled earlier");
516			}
517			}
518
519		21	return (FileEncoder) {
520			.cconv = cconv,
521			.crlf = crlf,
522			.fd = fd,
523			};
524			}
525
526		21	void file_encoder_free(FileEncoder *enc)
527			{
528	2/2 ✓ Branch 0 (2→3) taken 1 times. ✓ Branch 1 (2→4) taken 20 times.	21	if (enc->cconv) {
529		1	cconv_free(enc->cconv);
530			}
531		21	free(enc->nbuf);
532		21	}
533
534			// NOTE: buf must contain whole characters!
535		21	ssize_t file_encoder_write (
536			FileEncoder *enc,
537			const unsigned char *buf,
538			size_t size
539			) {
540	2/2 ✓ Branch 0 (2→3) taken 1 times. ✓ Branch 1 (2→5) taken 20 times.	21	if (unlikely(enc->crlf)) {
541		1	size = unix_to_dos(enc, buf, size);
542		1	buf = enc->nbuf;
543			}
544	2/2 ✓ Branch 0 (5→6) taken 1 times. ✓ Branch 1 (5→9) taken 20 times.	21	if (unlikely(enc->cconv)) {
545		1	cconv_process(enc->cconv, buf, size);
546		1	cconv_flush(enc->cconv);
547		1	buf = cconv_consume_all(enc->cconv, &size);
548			}
549		21	return xwrite_all(enc->fd, buf, size);
550			}
551
552		21	size_t file_encoder_get_nr_errors(const FileEncoder *enc)
553			{
554	2/2 ✓ Branch 0 (2→3) taken 1 times. ✓ Branch 1 (2→4) taken 20 times.	21	return enc->cconv ? enc->cconv->errors : 0;
555			}
556
557		✗	static bool fill(FileDecoder *dec)
558			{
559		✗	if (dec->ipos == dec->isize) {
560			return false;
561			}
562
563			// Smaller than cconv.obuf to make realloc less likely
564		✗	size_t max = 7 * 1024;
565
566		✗	size_t icount = MIN(dec->isize - dec->ipos, max);
567		✗	cconv_process(dec->cconv, dec->ibuf + dec->ipos, icount);
568		✗	dec->ipos += icount;
569		✗	if (dec->ipos == dec->isize) {
570			// Must be flushed after all input has been fed
571		✗	cconv_flush(dec->cconv);
572			}
573			return true;
574			}
575
576		✗	static bool decode_and_read_line(FileDecoder dec, const char linep, size_t lenp)
577			{
578		✗	char *line;
579		✗	size_t len;
580		✗	while (1) {
581		✗	line = cconv_consume_line(dec->cconv, &len);
582		✗	if (line \|\| !fill(dec)) {
583			break;
584			}
585			}
586
587		✗	if (line) {
588			// Newline not wanted
589		✗	len--;
590			} else {
591		✗	line = cconv_consume_all(dec->cconv, &len);
592		✗	if (len == 0) {
593			return false;
594			}
595			}
596
597		✗	*linep = line;
598		✗	*lenp = len;
599		✗	return true;
600			}
601
602		31	bool file_decoder_read(Buffer buffer, const unsigned char buf, size_t size)
603			{
604	1/2 ✓ Branch 0 (3→4) taken 31 times. ✗ Branch 1 (3→5) not taken.	31	if (encoding_is_utf8(buffer->encoding)) {
605		31	return file_decoder_read_utf8(buffer, buf, size);
606			}
607
608		✗	struct cconv *cconv = cconv_to_utf8(buffer->encoding);
609		✗	if (!cconv) {
610			return false;
611			}
612
613		✗	FileDecoder dec = {
614			.ibuf = buf,
615			.isize = size,
616			.cconv = cconv,
617			};
618
619		✗	const char *line;
620		✗	size_t len;
621
622		✗	if (decode_and_read_line(&dec, &line, &len)) {
623		✗	if (len && line[len - 1] == '\r') {
624		✗	buffer->crlf_newlines = true;
625		✗	len--;
626			}
627		✗	Block *blk = add_utf8_line(buffer, NULL, line, len);
628		✗	while (decode_and_read_line(&dec, &line, &len)) {
629		✗	if (buffer->crlf_newlines && len && line[len - 1] == '\r') {
630		✗	len--;
631			}
632		✗	blk = add_utf8_line(buffer, blk, line, len);
633			}
634		✗	if (blk) {
635		✗	add_block(buffer, blk);
636			}
637			}
638
639		✗	cconv_free(cconv);
640		✗	return true;
641			}
642
643			#endif
644

Function (Line)	Call count	Line coverage	Branch coverage	Condition coverage	Block coverage
add_block (line 31)	called 56 times	100.0%	-%	-%	100.0%
add_replacement (line 272)	not called	0.0%	0.0%	-%	0.0%
add_utf8_line (line 37)	called 7316 times	88.9%	62.5%	-%	70.0%
cconv_consume_all (line 477)	called 1 time	100.0%	-%	-%	100.0%
cconv_consume_line (line 462)	not called	0.0%	0.0%	-%	0.0%
cconv_flush (line 452)	called 1 time	50.0%	50.0%	-%	40.0%
cconv_free (line 485)	called 1 time	100.0%	-%	-%	75.0%
cconv_from_utf8 (line 441)	called 1 time	100.0%	50.0%	-%	100.0%
cconv_process (line 364)	called 1 time	34.6%	50.0%	-%	52.9%
cconv_to_utf8 (line 403)	not called	0.0%	0.0%	-%	0.0%
conversion_supported_by_iconv (line 493)	called 2 times	75.0%	50.0%	-%	87.5%
convert_incomplete (line 329)	not called	0.0%	0.0%	-%	0.0%
create (line 239)	called 1 time	100.0%	-%	-%	100.0%
decode_and_read_line (line 576)	not called	0.0%	0.0%	-%	0.0%
encode_replacement (line 424)	called 1 time	91.7%	50.0%	-%	80.0%
file_decoder_read (line 602)	called 31 times	14.3%	5.0%	-%	16.7%
file_decoder_read_utf8 (line 94)	called 31 times	90.5%	85.0%	-%	95.5%
file_encoder (line 509)	called 21 times	100.0%	75.0%	-%	83.3%
file_encoder_free (line 526)	called 21 times	100.0%	100.0%	-%	100.0%
file_encoder_get_nr_errors (line 552)	called 21 times	100.0%	100.0%	-%	100.0%
file_encoder_write (line 535)	called 21 times	100.0%	100.0%	-%	100.0%
fill (line 557)	not called	0.0%	0.0%	-%	0.0%
handle_invalid (line 282)	not called	0.0%	0.0%	-%	0.0%
iconv_wrapper (line 248)	called 2 times	100.0%	-%	-%	100.0%
read_utf8_line (line 72)	called 7347 times	100.0%	100.0%	-%	100.0%
resize_obuf (line 266)	not called	0.0%	-%	-%	0.0%
unix_to_dos (line 140)	called 1 time	100.0%	83.3%	-%	100.0%
xiconv (line 300)	called 1 time	50.0%	16.7%	-%	41.7%