| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <stddef.h> | ||
| 2 | #include "unicode.h" | ||
| 3 | #include "unidata.h" | ||
| 4 | #include "ascii.h" | ||
| 5 | |||
| 6 | #define BISEARCH(u, arr) bisearch((u), (arr), ARRAYLEN(arr) - 1) | ||
| 7 | |||
| 8 | 513 | static bool bisearch(CodePoint u, const CodepointRange *range, size_t max) | |
| 9 | { | ||
| 10 |
4/4✓ Branch 0 (2→3) taken 293 times.
✓ Branch 1 (2→10) taken 220 times.
✓ Branch 2 (3→9) taken 233 times.
✓ Branch 3 (3→10) taken 60 times.
|
513 | if (u < range[0].first || u > range[max].last) { |
| 11 | return false; | ||
| 12 | } | ||
| 13 | |||
| 14 | size_t min = 0; | ||
| 15 |
2/2✓ Branch 0 (9→4) taken 1709 times.
✓ Branch 1 (9→10) taken 109 times.
|
1818 | while (max >= min) { |
| 16 | 1709 | const size_t mid = (min + max) / 2; | |
| 17 |
2/2✓ Branch 0 (4→5) taken 1079 times.
✓ Branch 1 (4→6) taken 630 times.
|
1709 | if (u > range[mid].last) { |
| 18 | 1079 | min = mid + 1; | |
| 19 |
2/2✓ Branch 0 (6→7) taken 506 times.
✓ Branch 1 (6→10) taken 124 times.
|
630 | } else if (u < range[mid].first) { |
| 20 | 506 | max = mid - 1; | |
| 21 | } else { | ||
| 22 | return true; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | return false; | ||
| 27 | } | ||
| 28 | |||
| 29 | // Returns true for any whitespace character that isn't "non-breaking", | ||
| 30 | // i.e. one that is used purely to separate words and may, for example, | ||
| 31 | // be "broken" (changed to a newline) by hard wrapping. | ||
| 32 | 655 | bool u_is_breakable_whitespace(CodePoint u) | |
| 33 | { | ||
| 34 |
2/2✓ Branch 0 (2→3) taken 455 times.
✓ Branch 1 (2→4) taken 200 times.
|
655 | switch (u) { |
| 35 | case '\t': | ||
| 36 | case '\n': | ||
| 37 | case '\v': | ||
| 38 | case '\f': | ||
| 39 | case '\r': | ||
| 40 | case ' ': | ||
| 41 | case 0x1680: // Ogham space mark | ||
| 42 | case 0x2000: // En quad | ||
| 43 | case 0x2001: // Em quad | ||
| 44 | case 0x2002: // En space | ||
| 45 | case 0x2003: // Em space | ||
| 46 | case 0x2004: // 3-per-em space | ||
| 47 | case 0x2005: // 4-per-em space | ||
| 48 | case 0x2006: // 6-per-em space | ||
| 49 | case 0x2008: // Punctuation space | ||
| 50 | case 0x2009: // Thin space | ||
| 51 | case 0x200A: // Hair space | ||
| 52 | case 0x200B: // Zero width space | ||
| 53 | case 0x205F: // Medium mathematical space | ||
| 54 | case 0x3000: // Ideographic space | ||
| 55 | return true; | ||
| 56 | } | ||
| 57 | 455 | return false; | |
| 58 | } | ||
| 59 | |||
| 60 | 4337 | bool u_is_word_char(CodePoint u) | |
| 61 | { | ||
| 62 |
4/4✓ Branch 0 (2→3) taken 4313 times.
✓ Branch 1 (2→5) taken 24 times.
✓ Branch 2 (3→4) taken 4265 times.
✓ Branch 3 (3→5) taken 48 times.
|
4337 | return u >= 0x80 || is_alnum_or_underscore(u); |
| 63 | } | ||
| 64 | |||
| 65 | 163 | static bool u_is_default_ignorable(CodePoint u) | |
| 66 | { | ||
| 67 | 163 | return BISEARCH(u, default_ignorable); | |
| 68 | } | ||
| 69 | |||
| 70 | 145 | bool u_is_unprintable(CodePoint u) | |
| 71 | { | ||
| 72 |
4/4✓ Branch 0 (3→4) taken 64 times.
✓ Branch 1 (3→6) taken 81 times.
✓ Branch 2 (4→5) taken 22 times.
✓ Branch 3 (4→6) taken 42 times.
|
145 | return BISEARCH(u, unprintable) || !u_is_unicode(u); |
| 73 | } | ||
| 74 | |||
| 75 | 14 | bool u_is_special_whitespace(CodePoint u) | |
| 76 | { | ||
| 77 | 14 | return BISEARCH(u, special_whitespace); | |
| 78 | } | ||
| 79 | |||
| 80 | 174 | static bool u_is_nonspacing_mark(CodePoint u) | |
| 81 | { | ||
| 82 | 174 | return BISEARCH(u, nonspacing_mark); | |
| 83 | } | ||
| 84 | |||
| 85 | 174 | bool u_is_zero_width(CodePoint u) | |
| 86 | { | ||
| 87 |
4/4✓ Branch 0 (3→4) taken 163 times.
✓ Branch 1 (3→7) taken 11 times.
✓ Branch 2 (5→6) taken 8 times.
✓ Branch 3 (5→7) taken 155 times.
|
174 | return u_is_nonspacing_mark(u) || u_is_default_ignorable(u); |
| 88 | } | ||
| 89 | |||
| 90 | 17 | static bool u_is_double_width(CodePoint u) | |
| 91 | { | ||
| 92 | 17 | return BISEARCH(u, double_width); | |
| 93 | } | ||
| 94 | |||
| 95 | // Get the display width of `u`, where "display width" means the number | ||
| 96 | // of terminal columns occupied (either by the terminal's rendered font | ||
| 97 | // glyph or our own multi-column rendering) | ||
| 98 | 561 | unsigned int u_char_width(CodePoint u) | |
| 99 | { | ||
| 100 |
2/2✓ Branch 0 (2→3) taken 510 times.
✓ Branch 1 (2→5) taken 51 times.
|
561 | if (likely(u < 0x80)) { |
| 101 |
2/2✓ Branch 0 (3→4) taken 486 times.
✓ Branch 1 (3→13) taken 24 times.
|
510 | if (unlikely(ascii_iscntrl(u))) { |
| 102 | return 2; // Rendered by u_set_char() in caret notation (e.g. ^@) | ||
| 103 | } | ||
| 104 | 486 | return 1; | |
| 105 |
2/2✓ Branch 0 (6→7) taken 47 times.
✓ Branch 1 (6→13) taken 4 times.
|
51 | } else if (u_is_zero_width(u)) { |
| 106 | return 0; | ||
| 107 |
2/2✓ Branch 0 (8→9) taken 25 times.
✓ Branch 1 (8→13) taken 22 times.
|
47 | } else if (u_is_unprintable(u)) { |
| 108 | return 4; // Rendered by u_set_char() as <xx> | ||
| 109 |
2/2✓ Branch 0 (9→10) taken 17 times.
✓ Branch 1 (9→13) taken 8 times.
|
25 | } else if (u < 0x1100) { |
| 110 | return 1; | ||
| 111 |
2/2✓ Branch 0 (11→12) taken 16 times.
✓ Branch 1 (11→13) taken 1 times.
|
17 | } else if (u_is_double_width(u)) { |
| 112 | 16 | return 2; // Rendered by (modern) terminals as a 2 column glyph (e.g. 🎧) | |
| 113 | } | ||
| 114 | |||
| 115 | return 1; | ||
| 116 | } | ||
| 117 |