Line | Branch | Exec | Source |
---|---|---|---|
1 | #ifndef UTIL_BIT_H | ||
2 | #define UTIL_BIT_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdint.h> | ||
6 | #include "debug.h" | ||
7 | #include "macros.h" | ||
8 | |||
9 | /* | ||
10 | * The default C compiler on FreeBSD 14.1 (Clang 18.1.5) accepts the | ||
11 | * -std=gnu23 command-line option and then defines __STDC_VERSION__ as | ||
12 | * 202311L, while not actually providing a C23 conforming libc (the | ||
13 | * <stdbit.h> header is missing). This guard condition originally used | ||
14 | * `||` instead of `&&`, but it was changed so as to work around this | ||
15 | * disregard for standards. Fortunately this doesn't really have any | ||
16 | * downsides for other platforms, since __has_include() is also part | ||
17 | * of C23. | ||
18 | */ | ||
19 | #if __STDC_VERSION__ >= 202311L && HAS_INCLUDE(<stdbit.h>) | ||
20 | #include <stdbit.h> | ||
21 | #define USE_STDBIT(fn, arg) return fn(arg) | ||
22 | #else | ||
23 | #define USE_STDBIT(fn, arg) | ||
24 | #endif | ||
25 | |||
26 | #define U64(x) (UINT64_C(x)) | ||
27 | #define U32(x) (UINT32_C(x)) | ||
28 | |||
29 | #ifdef HAS_BUILTIN_TYPES_COMPATIBLE_P | ||
30 | #define HAS_COMPATIBLE_BUILTIN(arg, type, fn) \ | ||
31 | (GNUC_AT_LEAST(3, 4) || HAS_BUILTIN(__builtin_ ## fn)) \ | ||
32 | && __builtin_types_compatible_p(__typeof__(arg), type) | ||
33 | |||
34 | // If there's an appropriate built-in function for `arg`, emit | ||
35 | // a call to it (and eliminate everything below it as dead code) | ||
36 | #define USE_BUILTIN(fn, arg) \ | ||
37 | if (HAS_COMPATIBLE_BUILTIN(arg, unsigned long long, fn ## ll)) { \ | ||
38 | return __builtin_ ## fn ## ll(arg); \ | ||
39 | } else if (HAS_COMPATIBLE_BUILTIN(arg, unsigned long, fn ## l)) { \ | ||
40 | return __builtin_ ## fn ## l(arg); \ | ||
41 | } else if (HAS_COMPATIBLE_BUILTIN(arg, unsigned int, fn)) { \ | ||
42 | return __builtin_ ## fn(arg); \ | ||
43 | } | ||
44 | #else | ||
45 | #define USE_BUILTIN(fn, arg) | ||
46 | #endif | ||
47 | |||
48 | // Population count (cardinality) of set bits | ||
49 | 376 | static inline unsigned int u64_popcount(uint64_t n) | |
50 | { | ||
51 | 376 | USE_STDBIT(stdc_count_ones, n); | |
52 | USE_BUILTIN(popcount, n); | ||
53 | n -= ((n >> 1) & U64(0x5555555555555555)); | ||
54 | n = (n & U64(0x3333333333333333)) + ((n >> 2) & U64(0x3333333333333333)); | ||
55 | n = (n + (n >> 4)) & U64(0x0F0F0F0F0F0F0F0F); | ||
56 | return (n * U64(0x0101010101010101)) >> 56; | ||
57 | } | ||
58 | |||
59 | 104 | static inline unsigned int u32_popcount(uint32_t n) | |
60 | { | ||
61 | 104 | USE_STDBIT(stdc_count_ones, n); | |
62 | USE_BUILTIN(popcount, n); | ||
63 | n -= ((n >> 1) & U32(0x55555555)); | ||
64 | n = (n & U32(0x33333333)) + ((n >> 2) & U32(0x33333333)); | ||
65 | n = (n + (n >> 4)) & U32(0x0F0F0F0F); | ||
66 | return (n * U32(0x01010101)) >> 24; | ||
67 | } | ||
68 | |||
69 | // Count trailing zeros | ||
70 | 168 | static inline unsigned int u32_ctz(uint32_t n) | |
71 | { | ||
72 | 168 | BUG_ON(n == 0); | |
73 | 168 | USE_STDBIT(stdc_trailing_zeros, n); | |
74 | USE_BUILTIN(ctz, n); | ||
75 | return u32_popcount(~n & (n - 1)); | ||
76 | } | ||
77 | |||
78 | // Find position of first (least significant) set bit (starting at 1) | ||
79 | 12 | static inline unsigned int u32_ffs(uint32_t n) | |
80 | { | ||
81 | 12 | USE_BUILTIN(ffs, n); | |
82 | return n ? u32_ctz(n) + 1 : 0; | ||
83 | } | ||
84 | |||
85 | // Extract (isolate) least significant set bit | ||
86 | 83 | static inline uint32_t u32_lsbit(uint32_t x) | |
87 | { | ||
88 | 83 | return x & -x; | |
89 | } | ||
90 | |||
91 | // Return the power of 2 greater than or equal to `x`, or 0 if `x` is | ||
92 | // greater than the largest size_t power of 2 | ||
93 | 441 | static inline size_t next_pow2(size_t x) | |
94 | { | ||
95 |
2/2✓ Branch 0 (2→3) taken 440 times.
✓ Branch 1 (2→7) taken 1 times.
|
441 | if (unlikely(x == 0)) { |
96 | return 1; | ||
97 | } | ||
98 | 440 | x--; | |
99 | UNROLL_LOOP(8) | ||
100 |
2/2✓ Branch 0 (5→4) taken 2640 times.
✓ Branch 1 (5→6) taken 440 times.
|
3080 | for (size_t i = 1, n = BITSIZE(x); i < n; i <<= 1) { |
101 | 2640 | x |= x >> i; | |
102 | } | ||
103 | 440 | return x + 1; | |
104 | } | ||
105 | |||
106 | // Count leading zeros | ||
107 | 9 | static inline unsigned int u64_clz(uint64_t x) | |
108 | { | ||
109 | 9 | BUG_ON(x == 0); | |
110 | 9 | USE_STDBIT(stdc_leading_zeros, x); | |
111 | USE_BUILTIN(clz, x); | ||
112 | x |= x >> 1; | ||
113 | x |= x >> 2; | ||
114 | x |= x >> 4; | ||
115 | x |= x >> 8; | ||
116 | x |= x >> 16; | ||
117 | x |= x >> 32; | ||
118 | return u64_popcount(~x); | ||
119 | } | ||
120 | |||
121 | // Round x up to a multiple of r (which *must* be a power of 2) | ||
122 | 28918 | static inline size_t next_multiple(size_t x, size_t r) | |
123 | DIAGNOSE_IF(!IS_POWER_OF_2(r)) | ||
124 | { | ||
125 | 28918 | r--; | |
126 | 28918 | return (x + r) & ~r; | |
127 | } | ||
128 | |||
129 | // Calculate the number of significant bits in `x` (the minimum number | ||
130 | // of base 2 digits needed to represent it). Note that this returns 0 | ||
131 | // for x=0, but see umax_count_base16_digits() (below) for an example | ||
132 | // of how that can be special cased. | ||
133 | 111 | static inline unsigned int umax_bitwidth(uintmax_t x) | |
134 | { | ||
135 |
2/2✓ Branch 0 (2→3) taken 104 times.
✓ Branch 1 (2→4) taken 7 times.
|
111 | USE_STDBIT(stdc_bit_width, x); |
136 | |||
137 | #if HAS_BUILTIN(__builtin_stdc_bit_width) | ||
138 | return __builtin_stdc_bit_width(x); | ||
139 | #endif | ||
140 | |||
141 | if (BITSIZE(x) == 64) { | ||
142 | return x ? 64u - u64_clz(x) : 0; | ||
143 | } | ||
144 | |||
145 | unsigned int width = 0; | ||
146 | while (x) { | ||
147 | x >>= 1; | ||
148 | width++; | ||
149 | } | ||
150 | return width; | ||
151 | } | ||
152 | |||
153 | // Calculate the number of hexadecimal digits (0-F) needed to represent | ||
154 | // `x` as a string (which is 1 in the case of x=0) | ||
155 | 37 | static inline size_t umax_count_base16_digits(uintmax_t x) | |
156 | { | ||
157 | 37 | unsigned int bit_width = umax_bitwidth(x); | |
158 | |||
159 | // `bit_width` can only be 0 when `x` is 0, so this simply ensures | ||
160 | // 1 is returned in that case | ||
161 | 37 | BUG_ON(!bit_width != !x); // Improves code gen | |
162 | 37 | unsigned int base2_digits = bit_width + !x; | |
163 | |||
164 | // This is equivalent to `(base2_digits >> 2) + !!(base2_digits & 3)`, | ||
165 | // but GCC seems to generate slightly better code when done this way | ||
166 | 37 | return next_multiple(base2_digits, 4) / 4; | |
167 | } | ||
168 | |||
169 | #endif | ||
170 |