Line | Branch | Exec | Source |
---|---|---|---|
1 | #ifndef UTIL_BIT_H | ||
2 | #define UTIL_BIT_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdint.h> | ||
6 | #include "debug.h" | ||
7 | #include "macros.h" | ||
8 | |||
9 | /* | ||
10 | * The default C compiler on FreeBSD 14.1 (Clang 18.1.5) accepts the | ||
11 | * -std=gnu23 command-line option and then defines __STDC_VERSION__ as | ||
12 | * 202311L, while not actually providing a C23 conforming libc (the | ||
13 | * <stdbit.h> header is missing). This guard condition originally used | ||
14 | * `||` instead of `&&`, but it was changed so as to work around this | ||
15 | * disregard for standards. Fortunately this doesn't really have any | ||
16 | * downsides for other platforms, since __has_include() is also part | ||
17 | * of C23. | ||
18 | */ | ||
19 | #if __STDC_VERSION__ >= 202311L && HAS_INCLUDE(<stdbit.h>) | ||
20 | #include <stdbit.h> | ||
21 | #define USE_STDBIT(fn, arg) return fn(arg) | ||
22 | #else | ||
23 | #define USE_STDBIT(fn, arg) | ||
24 | #endif | ||
25 | |||
26 | #define U64(x) (UINT64_C(x)) | ||
27 | #define U32(x) (UINT32_C(x)) | ||
28 | |||
29 | #ifdef HAS_BUILTIN_TYPES_COMPATIBLE_P | ||
30 | #define HAS_COMPATIBLE_BUILTIN(arg, type, fn) \ | ||
31 | (GNUC_AT_LEAST(3, 4) || HAS_BUILTIN(__builtin_ ## fn)) \ | ||
32 | && __builtin_types_compatible_p(__typeof__(arg), type) | ||
33 | |||
34 | // If there's an appropriate built-in function for `arg`, emit | ||
35 | // a call to it (and eliminate everything below it as dead code) | ||
36 | #define USE_BUILTIN(fn, arg) \ | ||
37 | if (HAS_COMPATIBLE_BUILTIN(arg, unsigned long long, fn ## ll)) { \ | ||
38 | return __builtin_ ## fn ## ll(arg); \ | ||
39 | } else if (HAS_COMPATIBLE_BUILTIN(arg, unsigned long, fn ## l)) { \ | ||
40 | return __builtin_ ## fn ## l(arg); \ | ||
41 | } else if (HAS_COMPATIBLE_BUILTIN(arg, unsigned int, fn)) { \ | ||
42 | return __builtin_ ## fn(arg); \ | ||
43 | } | ||
44 | #else | ||
45 | #define USE_BUILTIN(fn, arg) | ||
46 | #endif | ||
47 | |||
48 | // Population count (cardinality) of set bits | ||
49 | 212 | static inline unsigned int u64_popcount(uint64_t n) | |
50 | { | ||
51 | 212 | USE_STDBIT(stdc_count_ones, n); | |
52 | USE_BUILTIN(popcount, n); | ||
53 | n -= ((n >> 1) & U64(0x5555555555555555)); | ||
54 | n = (n & U64(0x3333333333333333)) + ((n >> 2) & U64(0x3333333333333333)); | ||
55 | n = (n + (n >> 4)) & U64(0x0F0F0F0F0F0F0F0F); | ||
56 | return (n * U64(0x0101010101010101)) >> 56; | ||
57 | } | ||
58 | |||
59 | 104 | static inline unsigned int u32_popcount(uint32_t n) | |
60 | { | ||
61 | 104 | USE_STDBIT(stdc_count_ones, n); | |
62 | USE_BUILTIN(popcount, n); | ||
63 | n -= ((n >> 1) & U32(0x55555555)); | ||
64 | n = (n & U32(0x33333333)) + ((n >> 2) & U32(0x33333333)); | ||
65 | n = (n + (n >> 4)) & U32(0x0F0F0F0F); | ||
66 | return (n * U32(0x01010101)) >> 24; | ||
67 | } | ||
68 | |||
69 | // Count trailing zeros | ||
70 | 168 | static inline unsigned int u32_ctz(uint32_t n) | |
71 | { | ||
72 | 168 | BUG_ON(n == 0); | |
73 | 168 | USE_STDBIT(stdc_trailing_zeros, n); | |
74 | USE_BUILTIN(ctz, n); | ||
75 | return u32_popcount(~n & (n - 1)); | ||
76 | } | ||
77 | |||
78 | 30 | static inline unsigned int umax_ctz(uintmax_t n) | |
79 | { | ||
80 | 30 | BUG_ON(n == 0); | |
81 | 30 | USE_STDBIT(stdc_trailing_zeros, n); | |
82 | USE_BUILTIN(ctz, n); | ||
83 | |||
84 | unsigned int count = 0; | ||
85 | while (n) { | ||
86 | uint32_t part = (uint32_t)(n & 0xFFFFFFFFUL); | ||
87 | count += part ? u32_ctz(part) : 32; | ||
88 | n >>= 32; | ||
89 | } | ||
90 | |||
91 | return count; | ||
92 | } | ||
93 | |||
94 | // Find position of first (least significant) set bit (starting at 1) | ||
95 | 12 | static inline unsigned int u32_ffs(uint32_t n) | |
96 | { | ||
97 | 12 | USE_BUILTIN(ffs, n); | |
98 | return n ? u32_ctz(n) + 1 : 0; | ||
99 | } | ||
100 | |||
101 | // Extract (isolate) least significant set bit | ||
102 | 83 | static inline uint32_t u32_lsbit(uint32_t x) | |
103 | { | ||
104 | 83 | return x & -x; | |
105 | } | ||
106 | |||
107 | // Return the power of 2 greater than or equal to `x`, or 0 if `x` is | ||
108 | // greater than the largest size_t power of 2 | ||
109 | 441 | static inline size_t next_pow2(size_t x) | |
110 | { | ||
111 |
2/2✓ Branch 0 (2→3) taken 440 times.
✓ Branch 1 (2→7) taken 1 times.
|
441 | if (unlikely(x == 0)) { |
112 | return 1; | ||
113 | } | ||
114 | 440 | x--; | |
115 | UNROLL_LOOP(8) | ||
116 |
2/2✓ Branch 0 (5→4) taken 2640 times.
✓ Branch 1 (5→6) taken 440 times.
|
3080 | for (size_t i = 1, n = BITSIZE(x); i < n; i <<= 1) { |
117 | 2640 | x |= x >> i; | |
118 | } | ||
119 | 440 | return x + 1; | |
120 | } | ||
121 | |||
122 | // Count leading zeros | ||
123 | 9 | static inline unsigned int u64_clz(uint64_t x) | |
124 | { | ||
125 | 9 | BUG_ON(x == 0); | |
126 | 9 | USE_STDBIT(stdc_leading_zeros, x); | |
127 | USE_BUILTIN(clz, x); | ||
128 | x |= x >> 1; | ||
129 | x |= x >> 2; | ||
130 | x |= x >> 4; | ||
131 | x |= x >> 8; | ||
132 | x |= x >> 16; | ||
133 | x |= x >> 32; | ||
134 | return u64_popcount(~x); | ||
135 | } | ||
136 | |||
137 | // Round x up to a multiple of r (which *must* be a power of 2) | ||
138 | 30187 | static inline size_t next_multiple(size_t x, size_t r) | |
139 | DIAGNOSE_IF(!IS_POWER_OF_2(r)) | ||
140 | { | ||
141 | 30187 | r--; | |
142 | 30187 | return (x + r) & ~r; | |
143 | } | ||
144 | |||
145 | // Calculate the number of significant bits in `x` (the minimum number | ||
146 | // of base 2 digits needed to represent it). Note that this returns 0 | ||
147 | // for x=0, but see umax_count_base16_digits() (below) for an example | ||
148 | // of how that can be special cased. | ||
149 | 111 | static inline unsigned int umax_bitwidth(uintmax_t x) | |
150 | { | ||
151 |
2/2✓ Branch 0 (2→3) taken 104 times.
✓ Branch 1 (2→4) taken 7 times.
|
111 | USE_STDBIT(stdc_bit_width, x); |
152 | |||
153 | #if HAS_BUILTIN(__builtin_stdc_bit_width) | ||
154 | return __builtin_stdc_bit_width(x); | ||
155 | #endif | ||
156 | |||
157 | if (BITSIZE(x) == 64) { | ||
158 | return x ? 64u - u64_clz(x) : 0; | ||
159 | } | ||
160 | |||
161 | unsigned int width = 0; | ||
162 | while (x) { | ||
163 | x >>= 1; | ||
164 | width++; | ||
165 | } | ||
166 | return width; | ||
167 | } | ||
168 | |||
169 | // Calculate the number of hexadecimal digits (0-F) needed to represent | ||
170 | // `x` as a string (which is 1 in the case of x=0) | ||
171 | 37 | static inline size_t umax_count_base16_digits(uintmax_t x) | |
172 | { | ||
173 | 37 | unsigned int bit_width = umax_bitwidth(x); | |
174 | |||
175 | // `bit_width` can only be 0 when `x` is 0, so this simply ensures | ||
176 | // 1 is returned in that case | ||
177 | 37 | BUG_ON(!bit_width != !x); // Improves code gen | |
178 | 37 | unsigned int base2_digits = bit_width + !x; | |
179 | |||
180 | // This is equivalent to `(base2_digits >> 2) + !!(base2_digits & 3)`, | ||
181 | // but GCC seems to generate slightly better code when done this way | ||
182 | 37 | return next_multiple(base2_digits, 4) / 4; | |
183 | } | ||
184 | |||
185 | #endif | ||
186 |