| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #ifndef UTIL_UNICODE_H | ||
| 2 | #define UTIL_UNICODE_H | ||
| 3 | |||
| 4 | #include <stdbool.h> | ||
| 5 | #include <stdint.h> | ||
| 6 | #include "macros.h" | ||
| 7 | |||
| 8 | // Work around some musl-targeted toolchains failing to include this | ||
| 9 | // header automatically and thus failing to define __STDC_ISO_10646__ | ||
| 10 | #if HAS_INCLUDE(<stdc-predef.h>) | ||
| 11 | # include <stdc-predef.h> // NOLINT(portability-restrict-system-includes) | ||
| 12 | #endif | ||
| 13 | |||
| 14 | #if defined(WINT_MAX) && (WINT_MAX >= 0x10FFFF) && defined(__STDC_ISO_10646__) | ||
| 15 | # define SANE_WCTYPE 1 | ||
| 16 | #endif | ||
| 17 | |||
| 18 | // The maximum Unicode codepoint allowed by RFC 3629 | ||
| 19 | #define UNICODE_MAX_VALID_CODEPOINT UINT32_C(0x10FFFF) | ||
| 20 | |||
| 21 | typedef uint32_t CodePoint; | ||
| 22 | |||
| 23 | 1847 | static inline bool u_is_unicode(CodePoint u) | |
| 24 | { | ||
| 25 | 1847 | return u <= UNICODE_MAX_VALID_CODEPOINT; | |
| 26 | } | ||
| 27 | |||
| 28 | // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G2630 | ||
| 29 | 45 | static inline bool u_is_surrogate(CodePoint u) | |
| 30 | { | ||
| 31 | 45 | return (u >= 0xD800 && u <= 0xDFFF); | |
| 32 | } | ||
| 33 | |||
| 34 | 830 | static inline bool u_is_cntrl(CodePoint u) | |
| 35 | { | ||
| 36 | 830 | return (u < 0x20) || (u >= 0x7F && u <= 0x9F); | |
| 37 | } | ||
| 38 | |||
| 39 | 1099 | static inline bool u_is_ascii_upper(CodePoint u) | |
| 40 | { | ||
| 41 | 1099 | return u - 'A' < 26; | |
| 42 | } | ||
| 43 | |||
| 44 | #ifdef SANE_WCTYPE | ||
| 45 | #include <wctype.h> // NOLINT(portability-restrict-system-includes) | ||
| 46 | // NOLINTBEGIN(*-unsafe-functions) | ||
| 47 | 14 | static inline bool u_is_lower(CodePoint u) {return iswlower((wint_t)u);} | |
| 48 | 18 | static inline bool u_is_upper(CodePoint u) {return iswupper((wint_t)u);} | |
| 49 | 7 | static inline CodePoint u_to_lower(CodePoint u) {return towlower((wint_t)u);} | |
| 50 | 9 | static inline CodePoint u_to_upper(CodePoint u) {return towupper((wint_t)u);} | |
| 51 | // NOLINTEND(*-unsafe-functions) | ||
| 52 | #else | ||
| 53 | static inline bool u_is_lower(CodePoint u) {return (u - 'a') < 26;} | ||
| 54 | static inline bool u_is_upper(CodePoint u) {return (u - 'A') < 26;} | ||
| 55 | static inline CodePoint u_to_lower(CodePoint u) {return u_is_upper(u) ? u + 32 : u;} | ||
| 56 | static inline CodePoint u_to_upper(CodePoint u) {return u_is_lower(u) ? u - 32 : u;} | ||
| 57 | #endif | ||
| 58 | |||
| 59 | bool u_is_breakable_whitespace(CodePoint u) CONST_FN; | ||
| 60 | bool u_is_word_char(CodePoint u) CONST_FN; | ||
| 61 | bool u_is_unprintable(CodePoint u); | ||
| 62 | bool u_is_special_whitespace(CodePoint u) CONST_FN; | ||
| 63 | bool u_is_zero_width(CodePoint u); | ||
| 64 | unsigned int u_char_width(CodePoint uch) CONST_FN; | ||
| 65 | |||
| 66 | #endif | ||
| 67 |