dte test coverage


Directory: ./
File: src/util/unicode.c
Date: 2025-02-14 16:55:22
Exec Total Coverage
Lines: 34 34 100.0%
Functions: 10 10 100.0%
Branches: 34 36 94.4%

Line Branch Exec Source
1 #include <stddef.h>
2 #include "unicode.h"
3 #include "unidata.h"
4 #include "ascii.h"
5
6 #define BISEARCH(u, arr) bisearch((u), (arr), ARRAYLEN(arr) - 1)
7
8 419 static bool bisearch(CodePoint u, const CodepointRange *range, size_t max)
9 {
10
4/4
✓ Branch 0 (2→3) taken 199 times.
✓ Branch 1 (2→10) taken 220 times.
✓ Branch 2 (3→9) taken 190 times.
✓ Branch 3 (3→10) taken 9 times.
419 if (u < range[0].first || u > range[max].last) {
11 return false;
12 }
13
14 size_t min = 0;
15
2/2
✓ Branch 0 (9→4) taken 1446 times.
✓ Branch 1 (9→10) taken 82 times.
1528 while (max >= min) {
16 1446 const size_t mid = (min + max) / 2;
17
2/2
✓ Branch 0 (4→5) taken 845 times.
✓ Branch 1 (4→6) taken 601 times.
1446 if (u > range[mid].last) {
18 845 min = mid + 1;
19
2/2
✓ Branch 0 (6→7) taken 493 times.
✓ Branch 1 (6→10) taken 108 times.
601 } else if (u < range[mid].first) {
20 493 max = mid - 1;
21 } else {
22 return true;
23 }
24 }
25
26 return false;
27 }
28
29 // Returns true for any whitespace character that isn't "non-breaking",
30 // i.e. one that is used purely to separate words and may, for example,
31 // be "broken" (changed to a newline) by hard wrapping.
32 415 bool u_is_breakable_whitespace(CodePoint u)
33 {
34
2/2
✓ Branch 0 (2→3) taken 287 times.
✓ Branch 1 (2→4) taken 128 times.
415 switch (u) {
35 case '\t':
36 case '\n':
37 case '\v':
38 case '\f':
39 case '\r':
40 case ' ':
41 case 0x1680: // Ogham space mark
42 case 0x2000: // En quad
43 case 0x2001: // Em quad
44 case 0x2002: // En space
45 case 0x2003: // Em space
46 case 0x2004: // 3-per-em space
47 case 0x2005: // 4-per-em space
48 case 0x2006: // 6-per-em space
49 case 0x2008: // Punctuation space
50 case 0x2009: // Thin space
51 case 0x200A: // Hair space
52 case 0x200B: // Zero width space
53 case 0x205F: // Medium mathematical space
54 case 0x3000: // Ideographic space
55 return true;
56 }
57 287 return false;
58 }
59
60 4169 bool u_is_word_char(CodePoint u)
61 {
62
2/4
✓ Branch 0 (2→3) taken 4169 times.
✗ Branch 1 (2→5) not taken.
✓ Branch 2 (3→4) taken 4169 times.
✗ Branch 3 (3→5) not taken.
4169 return u >= 0x80 || is_alnum_or_underscore(u);
63 }
64
65 137 static bool u_is_default_ignorable(CodePoint u)
66 {
67 137 return BISEARCH(u, default_ignorable);
68 }
69
70 112 bool u_is_unprintable(CodePoint u)
71 {
72
4/4
✓ Branch 0 (3→4) taken 38 times.
✓ Branch 1 (3→6) taken 74 times.
✓ Branch 2 (4→5) taken 5 times.
✓ Branch 3 (4→6) taken 33 times.
112 return BISEARCH(u, unprintable) || !u_is_unicode(u);
73 }
74
75 14 bool u_is_special_whitespace(CodePoint u)
76 {
77 14 return BISEARCH(u, special_whitespace);
78 }
79
80 148 static bool u_is_nonspacing_mark(CodePoint u)
81 {
82 148 return BISEARCH(u, nonspacing_mark);
83 }
84
85 148 bool u_is_zero_width(CodePoint u)
86 {
87
4/4
✓ Branch 0 (3→4) taken 137 times.
✓ Branch 1 (3→7) taken 11 times.
✓ Branch 2 (5→6) taken 8 times.
✓ Branch 3 (5→7) taken 129 times.
148 return u_is_nonspacing_mark(u) || u_is_default_ignorable(u);
88 }
89
90 8 static bool u_is_double_width(CodePoint u)
91 {
92 8 return BISEARCH(u, double_width);
93 }
94
95 // Get the display width of `u`, where "display width" means the number
96 // of terminal columns occupied (either by the terminal's rendered font
97 // glyph or our own multi-column rendering)
98 475 unsigned int u_char_width(CodePoint u)
99 {
100
2/2
✓ Branch 0 (2→3) taken 450 times.
✓ Branch 1 (2→5) taken 25 times.
475 if (likely(u < 0x80)) {
101
2/2
✓ Branch 0 (3→4) taken 444 times.
✓ Branch 1 (3→13) taken 6 times.
450 if (unlikely(ascii_iscntrl(u))) {
102 return 2; // Rendered by u_set_char() in caret notation (e.g. ^@)
103 }
104 444 return 1;
105
2/2
✓ Branch 0 (6→7) taken 21 times.
✓ Branch 1 (6→13) taken 4 times.
25 } else if (u_is_zero_width(u)) {
106 return 0;
107
2/2
✓ Branch 0 (8→9) taken 16 times.
✓ Branch 1 (8→13) taken 5 times.
21 } else if (u_is_unprintable(u)) {
108 return 4; // Rendered by u_set_char() as <xx>
109
2/2
✓ Branch 0 (9→10) taken 8 times.
✓ Branch 1 (9→13) taken 8 times.
16 } else if (u < 0x1100) {
110 return 1;
111
2/2
✓ Branch 0 (11→12) taken 7 times.
✓ Branch 1 (11→13) taken 1 times.
8 } else if (u_is_double_width(u)) {
112 7 return 2; // Rendered by (modern) terminals as a 2 column glyph (e.g. 🎧)
113 }
114
115 return 1;
116 }
117