Line |
Branch |
Exec |
Source |
1 |
|
|
#ifndef UTIL_UNICODE_H |
2 |
|
|
#define UTIL_UNICODE_H |
3 |
|
|
|
4 |
|
|
#include <stdbool.h> |
5 |
|
|
#include <stdint.h> |
6 |
|
|
#include "macros.h" |
7 |
|
|
|
8 |
|
|
// Work around some musl-targeted toolchains failing to include this |
9 |
|
|
// header automatically and thus failing to define __STDC_ISO_10646__ |
10 |
|
|
#if HAS_INCLUDE(<stdc-predef.h>) |
11 |
|
|
# include <stdc-predef.h> // NOLINT(portability-restrict-system-includes) |
12 |
|
|
#endif |
13 |
|
|
|
14 |
|
|
#if defined(WINT_MAX) && (WINT_MAX >= 0x10FFFF) && defined(__STDC_ISO_10646__) |
15 |
|
|
# define SANE_WCTYPE 1 |
16 |
|
|
#endif |
17 |
|
|
|
18 |
|
|
// The maximum Unicode codepoint allowed by RFC 3629 |
19 |
|
|
#define UNICODE_MAX_VALID_CODEPOINT UINT32_C(0x10FFFF) |
20 |
|
|
|
21 |
|
|
typedef uint32_t CodePoint; |
22 |
|
|
|
23 |
|
1690 |
static inline bool u_is_unicode(CodePoint u) |
24 |
|
|
{ |
25 |
|
1690 |
return u <= UNICODE_MAX_VALID_CODEPOINT; |
26 |
|
|
} |
27 |
|
|
|
28 |
|
548 |
static inline bool u_is_cntrl(CodePoint u) |
29 |
|
|
{ |
30 |
|
548 |
return (u < 0x20) || (u >= 0x7F && u <= 0x9F); |
31 |
|
|
} |
32 |
|
|
|
33 |
|
817 |
static inline bool u_is_ascii_upper(CodePoint u) |
34 |
|
|
{ |
35 |
|
817 |
return u - 'A' < 26; |
36 |
|
|
} |
37 |
|
|
|
38 |
|
|
#ifdef SANE_WCTYPE |
39 |
|
|
#include <wctype.h> // NOLINT(portability-restrict-system-includes) |
40 |
|
|
|
41 |
|
14 |
static inline bool u_is_lower(CodePoint u) |
42 |
|
|
{ |
43 |
|
14 |
return iswlower((wint_t)u); |
44 |
|
|
} |
45 |
|
|
|
46 |
|
18 |
static inline bool u_is_upper(CodePoint u) |
47 |
|
|
{ |
48 |
|
18 |
return iswupper((wint_t)u); |
49 |
|
|
} |
50 |
|
|
|
51 |
|
7 |
static inline CodePoint u_to_lower(CodePoint u) |
52 |
|
|
{ |
53 |
|
7 |
return towlower((wint_t)u); |
54 |
|
|
} |
55 |
|
|
|
56 |
|
9 |
static inline CodePoint u_to_upper(CodePoint u) |
57 |
|
|
{ |
58 |
|
9 |
return towupper((wint_t)u); |
59 |
|
|
} |
60 |
|
|
#else |
61 |
|
|
static inline bool u_is_lower(CodePoint u) |
62 |
|
|
{ |
63 |
|
|
return (u - 'a') < 26; |
64 |
|
|
} |
65 |
|
|
|
66 |
|
|
static inline bool u_is_upper(CodePoint u) |
67 |
|
|
{ |
68 |
|
|
return (u - 'A') < 26; |
69 |
|
|
} |
70 |
|
|
|
71 |
|
|
static inline CodePoint u_to_lower(CodePoint u) |
72 |
|
|
{ |
73 |
|
|
return u_is_upper(u) ? u + 32 : u; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
static inline CodePoint u_to_upper(CodePoint u) |
77 |
|
|
{ |
78 |
|
|
return u_is_lower(u) ? u - 32 : u; |
79 |
|
|
} |
80 |
|
|
#endif // SANE_WCTYPE |
81 |
|
|
|
82 |
|
|
bool u_is_breakable_whitespace(CodePoint u) CONST_FN; |
83 |
|
|
bool u_is_word_char(CodePoint u) CONST_FN; |
84 |
|
|
bool u_is_unprintable(CodePoint u); |
85 |
|
|
bool u_is_special_whitespace(CodePoint u) CONST_FN; |
86 |
|
|
bool u_is_zero_width(CodePoint u); |
87 |
|
|
unsigned int u_char_width(CodePoint uch) CONST_FN; |
88 |
|
|
|
89 |
|
|
#endif |
90 |
|
|
|