Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <stddef.h> | ||
2 | #include "unicode.h" | ||
3 | #include "unidata.h" | ||
4 | #include "ascii.h" | ||
5 | |||
6 | #define BISEARCH(u, arr) bisearch((u), (arr), ARRAYLEN(arr) - 1) | ||
7 | |||
8 | 419 | static bool bisearch(CodePoint u, const CodepointRange *range, size_t max) | |
9 | { | ||
10 |
4/4✓ Branch 0 (2→3) taken 199 times.
✓ Branch 1 (2→10) taken 220 times.
✓ Branch 2 (3→9) taken 190 times.
✓ Branch 3 (3→10) taken 9 times.
|
419 | if (u < range[0].first || u > range[max].last) { |
11 | return false; | ||
12 | } | ||
13 | |||
14 | size_t min = 0; | ||
15 |
2/2✓ Branch 0 (9→4) taken 1446 times.
✓ Branch 1 (9→10) taken 82 times.
|
1528 | while (max >= min) { |
16 | 1446 | const size_t mid = (min + max) / 2; | |
17 |
2/2✓ Branch 0 (4→5) taken 845 times.
✓ Branch 1 (4→6) taken 601 times.
|
1446 | if (u > range[mid].last) { |
18 | 845 | min = mid + 1; | |
19 |
2/2✓ Branch 0 (6→7) taken 493 times.
✓ Branch 1 (6→10) taken 108 times.
|
601 | } else if (u < range[mid].first) { |
20 | 493 | max = mid - 1; | |
21 | } else { | ||
22 | return true; | ||
23 | } | ||
24 | } | ||
25 | |||
26 | return false; | ||
27 | } | ||
28 | |||
29 | // Returns true for any whitespace character that isn't "non-breaking", | ||
30 | // i.e. one that is used purely to separate words and may, for example, | ||
31 | // be "broken" (changed to a newline) by hard wrapping. | ||
32 | 415 | bool u_is_breakable_whitespace(CodePoint u) | |
33 | { | ||
34 |
2/2✓ Branch 0 (2→3) taken 287 times.
✓ Branch 1 (2→4) taken 128 times.
|
415 | switch (u) { |
35 | case '\t': | ||
36 | case '\n': | ||
37 | case '\v': | ||
38 | case '\f': | ||
39 | case '\r': | ||
40 | case ' ': | ||
41 | case 0x1680: // Ogham space mark | ||
42 | case 0x2000: // En quad | ||
43 | case 0x2001: // Em quad | ||
44 | case 0x2002: // En space | ||
45 | case 0x2003: // Em space | ||
46 | case 0x2004: // 3-per-em space | ||
47 | case 0x2005: // 4-per-em space | ||
48 | case 0x2006: // 6-per-em space | ||
49 | case 0x2008: // Punctuation space | ||
50 | case 0x2009: // Thin space | ||
51 | case 0x200A: // Hair space | ||
52 | case 0x200B: // Zero width space | ||
53 | case 0x205F: // Medium mathematical space | ||
54 | case 0x3000: // Ideographic space | ||
55 | return true; | ||
56 | } | ||
57 | 287 | return false; | |
58 | } | ||
59 | |||
60 | 4169 | bool u_is_word_char(CodePoint u) | |
61 | { | ||
62 |
2/4✓ Branch 0 (2→3) taken 4169 times.
✗ Branch 1 (2→5) not taken.
✓ Branch 2 (3→4) taken 4169 times.
✗ Branch 3 (3→5) not taken.
|
4169 | return u >= 0x80 || is_alnum_or_underscore(u); |
63 | } | ||
64 | |||
65 | 137 | static bool u_is_default_ignorable(CodePoint u) | |
66 | { | ||
67 | 137 | return BISEARCH(u, default_ignorable); | |
68 | } | ||
69 | |||
70 | 112 | bool u_is_unprintable(CodePoint u) | |
71 | { | ||
72 |
4/4✓ Branch 0 (3→4) taken 38 times.
✓ Branch 1 (3→6) taken 74 times.
✓ Branch 2 (4→5) taken 5 times.
✓ Branch 3 (4→6) taken 33 times.
|
112 | return BISEARCH(u, unprintable) || !u_is_unicode(u); |
73 | } | ||
74 | |||
75 | 14 | bool u_is_special_whitespace(CodePoint u) | |
76 | { | ||
77 | 14 | return BISEARCH(u, special_whitespace); | |
78 | } | ||
79 | |||
80 | 148 | static bool u_is_nonspacing_mark(CodePoint u) | |
81 | { | ||
82 | 148 | return BISEARCH(u, nonspacing_mark); | |
83 | } | ||
84 | |||
85 | 148 | bool u_is_zero_width(CodePoint u) | |
86 | { | ||
87 |
4/4✓ Branch 0 (3→4) taken 137 times.
✓ Branch 1 (3→7) taken 11 times.
✓ Branch 2 (5→6) taken 8 times.
✓ Branch 3 (5→7) taken 129 times.
|
148 | return u_is_nonspacing_mark(u) || u_is_default_ignorable(u); |
88 | } | ||
89 | |||
90 | 8 | static bool u_is_double_width(CodePoint u) | |
91 | { | ||
92 | 8 | return BISEARCH(u, double_width); | |
93 | } | ||
94 | |||
95 | // Get the display width of `u`, where "display width" means the number | ||
96 | // of terminal columns occupied (either by the terminal's rendered font | ||
97 | // glyph or our own multi-column rendering) | ||
98 | 475 | unsigned int u_char_width(CodePoint u) | |
99 | { | ||
100 |
2/2✓ Branch 0 (2→3) taken 450 times.
✓ Branch 1 (2→5) taken 25 times.
|
475 | if (likely(u < 0x80)) { |
101 |
2/2✓ Branch 0 (3→4) taken 444 times.
✓ Branch 1 (3→13) taken 6 times.
|
450 | if (unlikely(ascii_iscntrl(u))) { |
102 | return 2; // Rendered by u_set_char() in caret notation (e.g. ^@) | ||
103 | } | ||
104 | 444 | return 1; | |
105 |
2/2✓ Branch 0 (6→7) taken 21 times.
✓ Branch 1 (6→13) taken 4 times.
|
25 | } else if (u_is_zero_width(u)) { |
106 | return 0; | ||
107 |
2/2✓ Branch 0 (8→9) taken 16 times.
✓ Branch 1 (8→13) taken 5 times.
|
21 | } else if (u_is_unprintable(u)) { |
108 | return 4; // Rendered by u_set_char() as <xx> | ||
109 |
2/2✓ Branch 0 (9→10) taken 8 times.
✓ Branch 1 (9→13) taken 8 times.
|
16 | } else if (u < 0x1100) { |
110 | return 1; | ||
111 |
2/2✓ Branch 0 (11→12) taken 7 times.
✓ Branch 1 (11→13) taken 1 times.
|
8 | } else if (u_is_double_width(u)) { |
112 | 7 | return 2; // Rendered by (modern) terminals as a 2 column glyph (e.g. 🎧) | |
113 | } | ||
114 | |||
115 | return 1; | ||
116 | } | ||
117 |