dte test coverage

Directory:	./
File:	src/util/utf8.c
Date:	2025-02-14 16:55:22

	Exec	Total	Coverage
Lines:	117	125	93.6%
Functions:	12	13	92.3%
Branches:	38	46	82.6%

  
      Line
      Branch
      Exec
      Source
    
      #include <stdbool.h>
    
      #include <stdint.h>
    
      #include "utf8.h"
    
      #include "ascii.h"
    
      #include "debug.h"
    
      #include "numtostr.h"
    
      enum {
    
          I = -1, // Invalid byte
    
          C = 0,  // Continuation byte
    
      };
    
      // https://en.wikipedia.org/wiki/UTF-8#Byte_map
    
      // https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506
    
      static const int8_t seq_len_table[256] = {
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
    
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
    
          C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 80..8F
    
          C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 90..9F
    
          C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // A0..AF
    
          C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // B0..BF
    
          I, I, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..CF
    
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
    
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
    
          4, 4, 4, 4, 4, I, I, I, I, I, I, I, I, I, I, I  // F0..FF
    
      };
    
      55
      static int u_seq_len(unsigned char first_byte)
    
      {
    
      55
          int8_t len = seq_len_table[first_byte];
    
      55
          BUG_ON(len < I || len > UTF8_MAX_SEQ_LEN);
    
      55
          return len;
    
      }
    
      // https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27288
    
      74
      static bool u_is_continuation_byte(unsigned char u)
    
      {
    
          // (u & 0b11000000) == 0b10000000
    
      74
          return (u & 0xC0) == 0x80;
    
      }
    
      // https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506
    
      // https://en.wikipedia.org/wiki/UTF-8#Overlong_encodings
    
      // https://en.wikipedia.org/wiki/UTF-8#Error_handling
    
      33
      static bool u_seq_len_ok(CodePoint u, int len)
    
      {
    
      33
          return u_char_size(u) == len;
    
      }
    
      /*
    
       * Len  Mask         Note
    
       * -------------------------------------------------
    
       * 1    0111 1111    Not supported by this function!
    
       * 2    0001 1111
    
       * 3    0000 1111
    
       * 4    0000 0111
    
       * 5    0000 0011    Forbidden by RFC 3629
    
       * 6    0000 0001    Forbidden by RFC 3629
    
       */
    
      35
      static unsigned int u_get_first_byte_mask(unsigned int len)
    
      {
    
      35
          BUG_ON(len < 2);
    
      35
          BUG_ON(len > UTF8_MAX_SEQ_LEN);
    
      35
          return (0x80 >> len) - 1;
    
      }
    
      49
      size_t u_str_width(const unsigned char *str)
    
      {
    
      49
          size_t i = 0, w = 0;
    
        2/2✓ Branch 0 (5→3) taken 266 times.
✓ Branch 1 (5→6) taken 49 times.

      315
          while (str[i]) {
    
      266
              w += u_char_width(u_str_get_char(str, &i));
    
          }
    
      49
          return w;
    
      }
    
      18
      CodePoint u_prev_char(const unsigned char *str, size_t *idx)
    
      {
    
      18
          size_t i = *idx;
    
      18
          unsigned char ch = str[--i];
    
        2/2✓ Branch 0 (2→3) taken 7 times.
✓ Branch 1 (2→4) taken 11 times.

      18
          if (likely(ch < 0x80)) {
    
      7
              *idx = i;
    
      7
              return (CodePoint)ch;
    
          }
    
        2/2✓ Branch 0 (4→5) taken 5 times.
✓ Branch 1 (4→6) taken 6 times.

      11
          if (!u_is_continuation_byte(ch)) {
    
      5
              goto invalid;
    
          }
    
      6
          CodePoint u = ch & 0x3f;
    
        1/2✓ Branch 0 (18→7) taken 15 times.
✗ Branch 1 (18→19) not taken.

      15
          for (unsigned int count = 1, shift = 6; i > 0; ) {
    
      15
              ch = str[--i];
    
      15
              unsigned int len = u_seq_len(ch);
    
      15
              count++;
    
        2/2✓ Branch 0 (8→9) taken 9 times.
✓ Branch 1 (8→12) taken 6 times.

      15
              if (len == 0) {
    
        1/2✗ Branch 0 (9→10) not taken.
✓ Branch 1 (9→11) taken 9 times.

      9
                  if (count == 4) {
    
                      // Too long sequence
    
                      break;
    
                  }
    
      9
                  u |= (ch & 0x3f) << shift;
    
      9
                  shift += 6;
    
        1/2✗ Branch 0 (12→13) not taken.
✓ Branch 1 (12→14) taken 6 times.

      6
              } else if (count != len) {
    
                  // Incorrect length
    
                  break;
    
              } else {
    
      6
                  u |= (ch & u_get_first_byte_mask(len)) << shift;
    
        1/2✗ Branch 0 (15→16) not taken.
✓ Branch 1 (15→17) taken 6 times.

      6
                  if (!u_seq_len_ok(u, len)) {
    
                      break;
    
                  }
    
      6
                  *idx = i;
    
      6
                  return u;
    
              }
    
          }
    
      ✗
      invalid:
    
      5
          *idx = *idx - 1;
    
      5
          u = str[*idx];
    
      5
          return -u;
    
      }
    
      316
      CodePoint u_str_get_char(const unsigned char *str, size_t *idx)
    
      {
    
      316
          size_t i = *idx;
    
      316
          CodePoint u = str[i];
    
        2/2✓ Branch 0 (2→3) taken 301 times.
✓ Branch 1 (2→4) taken 15 times.

      316
          if (likely(u < 0x80)) {
    
      301
              *idx = i + 1;
    
      301
              return u;
    
          }
    
      15
          return u_get_nonascii(str, i + UTF8_MAX_SEQ_LEN, idx);
    
      }
    
      5858
      CodePoint u_get_char(const unsigned char *str, size_t size, size_t *idx)
    
      {
    
      5858
          size_t i = *idx;
    
      5858
          CodePoint u = str[i];
    
        2/2✓ Branch 0 (2→3) taken 5835 times.
✓ Branch 1 (2→4) taken 23 times.

      5858
          if (likely(u < 0x80)) {
    
      5835
              *idx = i + 1;
    
      5835
              return u;
    
          }
    
      23
          return u_get_nonascii(str, size, idx);
    
      }
    
      40
      CodePoint u_get_nonascii(const unsigned char *str, size_t size, size_t *idx)
    
      {
    
      40
          size_t i = *idx;
    
      40
          unsigned int first = str[i++];
    
      40
          int len = u_seq_len(first);
    
        4/4✓ Branch 0 (3→4) taken 30 times.
✓ Branch 1 (3→5) taken 10 times.
✓ Branch 2 (4→5) taken 1 times.
✓ Branch 3 (4→6) taken 29 times.

      40
          if (unlikely(len < 2 || len > size - i + 1)) {
    
      11
              goto invalid;
    
          }
    
      29
          CodePoint u = first & u_get_first_byte_mask(len);
    
      29
          int c = len - 1;
    
      63
          do {
    
      63
              unsigned char ch = str[i++];
    
        2/2✓ Branch 0 (8→9) taken 2 times.
✓ Branch 1 (8→10) taken 61 times.

      63
              if (!u_is_continuation_byte(ch)) {
    
      2
                  goto invalid;
    
              }
    
      61
              u = (u << 6) | (ch & 0x3f);
    
        2/2✓ Branch 0 (10→8) taken 34 times.
✓ Branch 1 (10→11) taken 27 times.

      61
          } while (--c);
    
        2/2✓ Branch 0 (11→12) taken 2 times.
✓ Branch 1 (11→13) taken 25 times.

      27
          if (!u_seq_len_ok(u, len)) {
    
              // Overlong encoding
    
      2
              goto invalid;
    
          }
    
      25
          *idx = i;
    
      25
          return u;
    
      15
      invalid:
    
      15
          *idx += 1;
    
      15
          return -first;
    
      }
    
      147
      size_t u_set_char_raw(char *buf, CodePoint u)
    
      {
    
      147
          unsigned int prefix = 0;
    
      147
          size_t len = u_char_size(u);
    
      147
          BUG_ON(len == 0 || len > UTF8_MAX_SEQ_LEN);
    
        4/4✓ Branch 0 (4→5) taken 8 times.
✓ Branch 1 (4→6) taken 9 times.
✓ Branch 2 (4→7) taken 4 times.
✓ Branch 3 (4→8) taken 126 times.

      147
          switch (len) {
    
      8
          case 4:
    
      8
              buf[3] = (u & 0x3F) | 0x80;
    
      8
              u >>= 6;
    
      8
              prefix |= 0xF0;
    
              // Fallthrough
    
      17
          case 3:
    
      17
              buf[2] = (u & 0x3F) | 0x80;
    
      17
              u >>= 6;
    
      17
              prefix |= 0xE0;
    
              // Fallthrough
    
      21
          case 2:
    
      21
              buf[1] = (u & 0x3F) | 0x80;
    
      21
              u >>= 6;
    
      21
              prefix |= 0xC0;
    
          }
    
      147
          buf[0] = (u & 0xFF) | prefix;
    
      147
          return len;
    
      }
    
      320
      size_t u_set_char(char *buf, CodePoint u)
    
      {
    
        2/2✓ Branch 0 (2→3) taken 301 times.
✓ Branch 1 (2→6) taken 19 times.

      320
          if (likely(u <= 0x7F)) {
    
      301
              size_t i = 0;
    
        2/2✓ Branch 0 (3→4) taken 5 times.
✓ Branch 1 (3→5) taken 296 times.

      301
              if (unlikely(ascii_iscntrl(u))) {
    
                  // Use caret notation for control chars:
    
      5
                  buf[i++] = '^';
    
      5
                  u = (u + 64) & 0x7F;
    
              }
    
      301
              buf[i++] = u;
    
      301
              return i;
    
          }
    
        2/2✓ Branch 0 (7→8) taken 8 times.
✓ Branch 1 (7→9) taken 11 times.

      19
          if (u_is_unprintable(u)) {
    
      8
              return u_set_hex(buf, u);
    
          }
    
      11
          BUG_ON(u > 0x10FFFF); // (implied by !u_is_unprintable(u))
    
      11
          return u_set_char_raw(buf, u);
    
      }
    
      8
      size_t u_set_hex(char buf[U_SET_HEX_LEN], CodePoint u)
    
      {
    
      8
          buf[0] = '<';
    
        2/2✓ Branch 0 (2→3) taken 5 times.
✓ Branch 1 (2→4) taken 3 times.

      8
          if (!u_is_unicode(u)) {
    
              // Invalid byte (negated)
    
      5
              u *= -1;
    
      5
              hex_encode_byte(buf + 1, u & 0xFF);
    
          } else {
    
      3
              buf[1] = '?';
    
      3
              buf[2] = '?';
    
          }
    
      8
          buf[3] = '>';
    
      8
          return U_SET_HEX_LEN;
    
      }
    
      /*
    
       * Total width of skipped characters is stored back to @width.
    
       *
    
       * Stored @width can be 1 more than given width if the last skipped
    
       * character was double width or even 3 more if the last skipped
    
       * character was invalid (<xx>).
    
       *
    
       * Returns number of bytes skipped.
    
       */
    
      ✗
      size_t u_skip_chars(const char *str, int *width)
    
      {
    
      ✗
          int w = *width;
    
      ✗
          size_t idx = 0;
    
      ✗
          while (str[idx] && w > 0) {
    
      ✗
              w -= u_char_width(u_str_get_char(str, &idx));
    
          }
    
          // Add 1..3 if skipped 'too much' (the last char was double
    
          // width or invalid (<xx>))
    
      ✗
          *width -= w;
    
      ✗
          return idx;
    
      }

Line	Branch	Exec	Source
1			#include <stdbool.h>
2			#include <stdint.h>
3			#include "utf8.h"
4			#include "ascii.h"
5			#include "debug.h"
6			#include "numtostr.h"
7
8			enum {
9			I = -1, // Invalid byte
10			C = 0, // Continuation byte
11			};
12
13			// https://en.wikipedia.org/wiki/UTF-8#Byte_map
14			// https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506
15			static const int8_t seq_len_table[256] = {
16			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
17			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
18			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
19			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
20			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
21			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
22			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
23			1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
24			C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 80..8F
25			C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 90..9F
26			C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // A0..AF
27			C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // B0..BF
28			I, I, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..CF
29			2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
30			3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
31			4, 4, 4, 4, 4, I, I, I, I, I, I, I, I, I, I, I // F0..FF
32			};
33
34		55	static int u_seq_len(unsigned char first_byte)
35			{
36		55	int8_t len = seq_len_table[first_byte];
37		55	BUG_ON(len < I \|\| len > UTF8_MAX_SEQ_LEN);
38		55	return len;
39			}
40
41			// https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27288
42		74	static bool u_is_continuation_byte(unsigned char u)
43			{
44			// (u & 0b11000000) == 0b10000000
45		74	return (u & 0xC0) == 0x80;
46			}
47
48			// https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506
49			// https://en.wikipedia.org/wiki/UTF-8#Overlong_encodings
50			// https://en.wikipedia.org/wiki/UTF-8#Error_handling
51		33	static bool u_seq_len_ok(CodePoint u, int len)
52			{
53		33	return u_char_size(u) == len;
54			}
55
56			/*
57			* Len Mask Note
58			* -------------------------------------------------
59			* 1 0111 1111 Not supported by this function!
60			* 2 0001 1111
61			* 3 0000 1111
62			* 4 0000 0111
63			* 5 0000 0011 Forbidden by RFC 3629
64			* 6 0000 0001 Forbidden by RFC 3629
65			*/
66		35	static unsigned int u_get_first_byte_mask(unsigned int len)
67			{
68		35	BUG_ON(len < 2);
69		35	BUG_ON(len > UTF8_MAX_SEQ_LEN);
70		35	return (0x80 >> len) - 1;
71			}
72
73		49	size_t u_str_width(const unsigned char *str)
74			{
75		49	size_t i = 0, w = 0;
76	2/2 ✓ Branch 0 (5→3) taken 266 times. ✓ Branch 1 (5→6) taken 49 times.	315	while (str[i]) {
77		266	w += u_char_width(u_str_get_char(str, &i));
78			}
79		49	return w;
80			}
81
82		18	CodePoint u_prev_char(const unsigned char str, size_t idx)
83			{
84		18	size_t i = *idx;
85		18	unsigned char ch = str[--i];
86	2/2 ✓ Branch 0 (2→3) taken 7 times. ✓ Branch 1 (2→4) taken 11 times.	18	if (likely(ch < 0x80)) {
87		7	*idx = i;
88		7	return (CodePoint)ch;
89			}
90
91	2/2 ✓ Branch 0 (4→5) taken 5 times. ✓ Branch 1 (4→6) taken 6 times.	11	if (!u_is_continuation_byte(ch)) {
92		5	goto invalid;
93			}
94
95		6	CodePoint u = ch & 0x3f;
96	1/2 ✓ Branch 0 (18→7) taken 15 times. ✗ Branch 1 (18→19) not taken.	15	for (unsigned int count = 1, shift = 6; i > 0; ) {
97		15	ch = str[--i];
98		15	unsigned int len = u_seq_len(ch);
99		15	count++;
100	2/2 ✓ Branch 0 (8→9) taken 9 times. ✓ Branch 1 (8→12) taken 6 times.	15	if (len == 0) {
101	1/2 ✗ Branch 0 (9→10) not taken. ✓ Branch 1 (9→11) taken 9 times.	9	if (count == 4) {
102			// Too long sequence
103			break;
104			}
105		9	u \|= (ch & 0x3f) << shift;
106		9	shift += 6;
107	1/2 ✗ Branch 0 (12→13) not taken. ✓ Branch 1 (12→14) taken 6 times.	6	} else if (count != len) {
108			// Incorrect length
109			break;
110			} else {
111		6	u \|= (ch & u_get_first_byte_mask(len)) << shift;
112	1/2 ✗ Branch 0 (15→16) not taken. ✓ Branch 1 (15→17) taken 6 times.	6	if (!u_seq_len_ok(u, len)) {
113			break;
114			}
115		6	*idx = i;
116		6	return u;
117			}
118			}
119
120		✗	invalid:
121		5	idx = idx - 1;
122		5	u = str[*idx];
123		5	return -u;
124			}
125
126		316	CodePoint u_str_get_char(const unsigned char str, size_t idx)
127			{
128		316	size_t i = *idx;
129		316	CodePoint u = str[i];
130	2/2 ✓ Branch 0 (2→3) taken 301 times. ✓ Branch 1 (2→4) taken 15 times.	316	if (likely(u < 0x80)) {
131		301	*idx = i + 1;
132		301	return u;
133			}
134		15	return u_get_nonascii(str, i + UTF8_MAX_SEQ_LEN, idx);
135			}
136
137		5858	CodePoint u_get_char(const unsigned char str, size_t size, size_t idx)
138			{
139		5858	size_t i = *idx;
140		5858	CodePoint u = str[i];
141	2/2 ✓ Branch 0 (2→3) taken 5835 times. ✓ Branch 1 (2→4) taken 23 times.	5858	if (likely(u < 0x80)) {
142		5835	*idx = i + 1;
143		5835	return u;
144			}
145		23	return u_get_nonascii(str, size, idx);
146			}
147
148		40	CodePoint u_get_nonascii(const unsigned char str, size_t size, size_t idx)
149			{
150		40	size_t i = *idx;
151		40	unsigned int first = str[i++];
152		40	int len = u_seq_len(first);
153	4/4 ✓ Branch 0 (3→4) taken 30 times. ✓ Branch 1 (3→5) taken 10 times. ✓ Branch 2 (4→5) taken 1 times. ✓ Branch 3 (4→6) taken 29 times.	40	if (unlikely(len < 2 \|\| len > size - i + 1)) {
154		11	goto invalid;
155			}
156
157		29	CodePoint u = first & u_get_first_byte_mask(len);
158		29	int c = len - 1;
159		63	do {
160		63	unsigned char ch = str[i++];
161	2/2 ✓ Branch 0 (8→9) taken 2 times. ✓ Branch 1 (8→10) taken 61 times.	63	if (!u_is_continuation_byte(ch)) {
162		2	goto invalid;
163			}
164		61	u = (u << 6) \| (ch & 0x3f);
165	2/2 ✓ Branch 0 (10→8) taken 34 times. ✓ Branch 1 (10→11) taken 27 times.	61	} while (--c);
166
167	2/2 ✓ Branch 0 (11→12) taken 2 times. ✓ Branch 1 (11→13) taken 25 times.	27	if (!u_seq_len_ok(u, len)) {
168			// Overlong encoding
169		2	goto invalid;
170			}
171
172		25	*idx = i;
173		25	return u;
174
175		15	invalid:
176		15	*idx += 1;
177		15	return -first;
178			}
179
180		147	size_t u_set_char_raw(char *buf, CodePoint u)
181			{
182		147	unsigned int prefix = 0;
183		147	size_t len = u_char_size(u);
184		147	BUG_ON(len == 0 \|\| len > UTF8_MAX_SEQ_LEN);
185
186	4/4 ✓ Branch 0 (4→5) taken 8 times. ✓ Branch 1 (4→6) taken 9 times. ✓ Branch 2 (4→7) taken 4 times. ✓ Branch 3 (4→8) taken 126 times.	147	switch (len) {
187		8	case 4:
188		8	buf[3] = (u & 0x3F) \| 0x80;
189		8	u >>= 6;
190		8	prefix \|= 0xF0;
191			// Fallthrough
192		17	case 3:
193		17	buf[2] = (u & 0x3F) \| 0x80;
194		17	u >>= 6;
195		17	prefix \|= 0xE0;
196			// Fallthrough
197		21	case 2:
198		21	buf[1] = (u & 0x3F) \| 0x80;
199		21	u >>= 6;
200		21	prefix \|= 0xC0;
201			}
202
203		147	buf[0] = (u & 0xFF) \| prefix;
204		147	return len;
205			}
206
207		320	size_t u_set_char(char *buf, CodePoint u)
208			{
209	2/2 ✓ Branch 0 (2→3) taken 301 times. ✓ Branch 1 (2→6) taken 19 times.	320	if (likely(u <= 0x7F)) {
210		301	size_t i = 0;
211	2/2 ✓ Branch 0 (3→4) taken 5 times. ✓ Branch 1 (3→5) taken 296 times.	301	if (unlikely(ascii_iscntrl(u))) {
212			// Use caret notation for control chars:
213		5	buf[i++] = '^';
214		5	u = (u + 64) & 0x7F;
215			}
216		301	buf[i++] = u;
217		301	return i;
218			}
219
220	2/2 ✓ Branch 0 (7→8) taken 8 times. ✓ Branch 1 (7→9) taken 11 times.	19	if (u_is_unprintable(u)) {
221		8	return u_set_hex(buf, u);
222			}
223
224		11	BUG_ON(u > 0x10FFFF); // (implied by !u_is_unprintable(u))
225		11	return u_set_char_raw(buf, u);
226			}
227
228		8	size_t u_set_hex(char buf[U_SET_HEX_LEN], CodePoint u)
229			{
230		8	buf[0] = '<';
231	2/2 ✓ Branch 0 (2→3) taken 5 times. ✓ Branch 1 (2→4) taken 3 times.	8	if (!u_is_unicode(u)) {
232			// Invalid byte (negated)
233		5	u *= -1;
234		5	hex_encode_byte(buf + 1, u & 0xFF);
235			} else {
236		3	buf[1] = '?';
237		3	buf[2] = '?';
238			}
239		8	buf[3] = '>';
240		8	return U_SET_HEX_LEN;
241			}
242
243			/*
244			* Total width of skipped characters is stored back to @width.
245			*
246			* Stored @width can be 1 more than given width if the last skipped
247			* character was double width or even 3 more if the last skipped
248			* character was invalid (<xx>).
249			*
250			* Returns number of bytes skipped.
251			*/
252		✗	size_t u_skip_chars(const char str, int width)
253			{
254		✗	int w = *width;
255		✗	size_t idx = 0;
256		✗	while (str[idx] && w > 0) {
257		✗	w -= u_char_width(u_str_get_char(str, &idx));
258			}
259
260			// Add 1..3 if skipped 'too much' (the last char was double
261			// width or invalid (<xx>))
262		✗	*width -= w;
263		✗	return idx;
264			}
265

Function (Line)	Call count	Line coverage	Branch coverage	Condition coverage	Block coverage
u_get_char (line 137)	called 5858 times	100.0%	100.0%	-%	100.0%
u_get_first_byte_mask (line 66)	called 35 times	100.0%	-%	-%	60.0%
u_get_nonascii (line 148)	called 40 times	100.0%	100.0%	-%	100.0%
u_is_continuation_byte (line 42)	called 74 times	100.0%	-%	-%	100.0%
u_prev_char (line 82)	called 18 times	96.2%	71.4%	-%	80.0%
u_seq_len (line 34)	called 55 times	100.0%	-%	-%	66.7%
u_seq_len_ok (line 51)	called 33 times	100.0%	-%	-%	100.0%
u_set_char (line 207)	called 320 times	100.0%	100.0%	-%	90.9%
u_set_char_raw (line 180)	called 147 times	100.0%	100.0%	-%	85.7%
u_set_hex (line 228)	called 8 times	100.0%	100.0%	-%	100.0%
u_skip_chars (line 252)	not called	0.0%	0.0%	-%	0.0%
u_str_get_char (line 126)	called 316 times	100.0%	100.0%	-%	100.0%
u_str_width (line 73)	called 49 times	100.0%	100.0%	-%	100.0%