dte test coverage


Directory: ./
File: src/util/unicode.h
Date: 2025-09-07 23:01:39
Exec Total Coverage
Lines: 12 12 100.0%
Functions: 8 8 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 #ifndef UTIL_UNICODE_H
2 #define UTIL_UNICODE_H
3
4 #include <stdbool.h>
5 #include <stdint.h>
6 #include "macros.h"
7
8 // Work around some musl-targeted toolchains failing to include this
9 // header automatically and thus failing to define __STDC_ISO_10646__
10 #if HAS_INCLUDE(<stdc-predef.h>)
11 # include <stdc-predef.h> // NOLINT(portability-restrict-system-includes)
12 #endif
13
14 #if defined(WINT_MAX) && (WINT_MAX >= 0x10FFFF) && defined(__STDC_ISO_10646__)
15 # define SANE_WCTYPE 1
16 #endif
17
18 // The maximum Unicode codepoint allowed by RFC 3629
19 #define UNICODE_MAX_VALID_CODEPOINT UINT32_C(0x10FFFF)
20
21 typedef uint32_t CodePoint;
22
23 1847 static inline bool u_is_unicode(CodePoint u)
24 {
25 1847 return u <= UNICODE_MAX_VALID_CODEPOINT;
26 }
27
28 // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G2630
29 45 static inline bool u_is_surrogate(CodePoint u)
30 {
31 45 return (u >= 0xD800 && u <= 0xDFFF);
32 }
33
34 830 static inline bool u_is_cntrl(CodePoint u)
35 {
36 830 return (u < 0x20) || (u >= 0x7F && u <= 0x9F);
37 }
38
39 1099 static inline bool u_is_ascii_upper(CodePoint u)
40 {
41 1099 return u - 'A' < 26;
42 }
43
44 #ifdef SANE_WCTYPE
45 #include <wctype.h> // NOLINT(portability-restrict-system-includes)
46 // NOLINTBEGIN(*-unsafe-functions)
47 14 static inline bool u_is_lower(CodePoint u) {return iswlower((wint_t)u);}
48 18 static inline bool u_is_upper(CodePoint u) {return iswupper((wint_t)u);}
49 7 static inline CodePoint u_to_lower(CodePoint u) {return towlower((wint_t)u);}
50 9 static inline CodePoint u_to_upper(CodePoint u) {return towupper((wint_t)u);}
51 // NOLINTEND(*-unsafe-functions)
52 #else
53 static inline bool u_is_lower(CodePoint u) {return (u - 'a') < 26;}
54 static inline bool u_is_upper(CodePoint u) {return (u - 'A') < 26;}
55 static inline CodePoint u_to_lower(CodePoint u) {return u_is_upper(u) ? u + 32 : u;}
56 static inline CodePoint u_to_upper(CodePoint u) {return u_is_lower(u) ? u - 32 : u;}
57 #endif
58
59 bool u_is_breakable_whitespace(CodePoint u) CONST_FN;
60 bool u_is_word_char(CodePoint u) CONST_FN;
61 bool u_is_unprintable(CodePoint u);
62 bool u_is_special_whitespace(CodePoint u) CONST_FN;
63 bool u_is_zero_width(CodePoint u);
64 unsigned int u_char_width(CodePoint uch) CONST_FN;
65
66 #endif
67