Line |
Branch |
Exec |
Source |
1 |
|
|
#ifndef UTIL_UNICODE_H |
2 |
|
|
#define UTIL_UNICODE_H |
3 |
|
|
|
4 |
|
|
#include <stdbool.h> |
5 |
|
|
#include <stdint.h> |
6 |
|
|
#include "macros.h" |
7 |
|
|
|
8 |
|
|
// Work around some musl-targeted toolchains failing to include this |
9 |
|
|
// header automatically and thus failing to define __STDC_ISO_10646__ |
10 |
|
|
#if HAS_INCLUDE(<stdc-predef.h>) |
11 |
|
|
# include <stdc-predef.h> // NOLINT(portability-restrict-system-includes) |
12 |
|
|
#endif |
13 |
|
|
|
14 |
|
|
#if defined(WINT_MAX) && (WINT_MAX >= 0x10FFFF) && defined(__STDC_ISO_10646__) |
15 |
|
|
# define SANE_WCTYPE 1 |
16 |
|
|
#endif |
17 |
|
|
|
18 |
|
|
// The maximum Unicode codepoint allowed by RFC 3629 |
19 |
|
|
#define UNICODE_MAX_VALID_CODEPOINT UINT32_C(0x10FFFF) |
20 |
|
|
|
21 |
|
|
typedef uint32_t CodePoint; |
22 |
|
|
|
23 |
|
1347 |
static inline bool u_is_unicode(CodePoint u) |
24 |
|
|
{ |
25 |
|
1347 |
return u <= UNICODE_MAX_VALID_CODEPOINT; |
26 |
|
|
} |
27 |
|
|
|
28 |
|
|
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G2630 |
29 |
|
45 |
static inline bool u_is_surrogate(CodePoint u) |
30 |
|
|
{ |
31 |
|
45 |
return (u >= 0xD800 && u <= 0xDFFF); |
32 |
|
|
} |
33 |
|
|
|
34 |
|
630 |
static inline bool u_is_cntrl(CodePoint u) |
35 |
|
|
{ |
36 |
|
630 |
return (u < 0x20) || (u >= 0x7F && u <= 0x9F); |
37 |
|
|
} |
38 |
|
|
|
39 |
|
899 |
static inline bool u_is_ascii_upper(CodePoint u) |
40 |
|
|
{ |
41 |
|
899 |
return u - 'A' < 26; |
42 |
|
|
} |
43 |
|
|
|
44 |
|
|
#ifdef SANE_WCTYPE |
45 |
|
|
#include <wctype.h> // NOLINT(portability-restrict-system-includes) |
46 |
|
|
|
47 |
|
14 |
static inline bool u_is_lower(CodePoint u) |
48 |
|
|
{ |
49 |
|
14 |
return iswlower((wint_t)u); |
50 |
|
|
} |
51 |
|
|
|
52 |
|
18 |
static inline bool u_is_upper(CodePoint u) |
53 |
|
|
{ |
54 |
|
18 |
return iswupper((wint_t)u); |
55 |
|
|
} |
56 |
|
|
|
57 |
|
7 |
static inline CodePoint u_to_lower(CodePoint u) |
58 |
|
|
{ |
59 |
|
7 |
return towlower((wint_t)u); |
60 |
|
|
} |
61 |
|
|
|
62 |
|
9 |
static inline CodePoint u_to_upper(CodePoint u) |
63 |
|
|
{ |
64 |
|
9 |
return towupper((wint_t)u); |
65 |
|
|
} |
66 |
|
|
#else |
67 |
|
|
static inline bool u_is_lower(CodePoint u) |
68 |
|
|
{ |
69 |
|
|
return (u - 'a') < 26; |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
static inline bool u_is_upper(CodePoint u) |
73 |
|
|
{ |
74 |
|
|
return (u - 'A') < 26; |
75 |
|
|
} |
76 |
|
|
|
77 |
|
|
static inline CodePoint u_to_lower(CodePoint u) |
78 |
|
|
{ |
79 |
|
|
return u_is_upper(u) ? u + 32 : u; |
80 |
|
|
} |
81 |
|
|
|
82 |
|
|
static inline CodePoint u_to_upper(CodePoint u) |
83 |
|
|
{ |
84 |
|
|
return u_is_lower(u) ? u - 32 : u; |
85 |
|
|
} |
86 |
|
|
#endif // SANE_WCTYPE |
87 |
|
|
|
88 |
|
|
bool u_is_breakable_whitespace(CodePoint u) CONST_FN; |
89 |
|
|
bool u_is_word_char(CodePoint u) CONST_FN; |
90 |
|
|
bool u_is_unprintable(CodePoint u); |
91 |
|
|
bool u_is_special_whitespace(CodePoint u) CONST_FN; |
92 |
|
|
bool u_is_zero_width(CodePoint u); |
93 |
|
|
unsigned int u_char_width(CodePoint uch) CONST_FN; |
94 |
|
|
|
95 |
|
|
#endif |
96 |
|
|
|