1 | // |
2 | // Copyright 2017 The Abseil Authors. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // https://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | // |
16 | // ----------------------------------------------------------------------------- |
17 | // File: optimization.h |
18 | // ----------------------------------------------------------------------------- |
19 | // |
20 | // This header file defines portable macros for performance optimization. |
21 | |
22 | #ifndef ABSL_BASE_OPTIMIZATION_H_ |
23 | #define ABSL_BASE_OPTIMIZATION_H_ |
24 | |
25 | #include <assert.h> |
26 | |
27 | #include "absl/base/config.h" |
28 | #include "absl/base/options.h" |
29 | |
30 | // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION |
31 | // |
32 | // Instructs the compiler to avoid optimizing tail-call recursion. This macro is |
33 | // useful when you wish to preserve the existing function order within a stack |
34 | // trace for logging, debugging, or profiling purposes. |
35 | // |
36 | // Example: |
37 | // |
38 | // int f() { |
39 | // int result = g(); |
40 | // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); |
41 | // return result; |
42 | // } |
43 | #if defined(__pnacl__) |
44 | #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
45 | #elif defined(__clang__) |
46 | // Clang will not tail call given inline volatile assembly. |
47 | #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
48 | #elif defined(__GNUC__) |
49 | // GCC will not tail call given inline volatile assembly. |
50 | #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
51 | #elif defined(_MSC_VER) |
52 | #include <intrin.h> |
53 | // The __nop() intrinsic blocks the optimisation. |
54 | #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() |
55 | #else |
56 | #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
57 | #endif |
58 | |
59 | // ABSL_CACHELINE_SIZE |
60 | // |
61 | // Explicitly defines the size of the L1 cache for purposes of alignment. |
62 | // Setting the cacheline size allows you to specify that certain objects be |
63 | // aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations. |
64 | // (See below.) |
65 | // |
66 | // NOTE: this macro should be replaced with the following C++17 features, when |
67 | // those are generally available: |
68 | // |
69 | // * `std::hardware_constructive_interference_size` |
70 | // * `std::hardware_destructive_interference_size` |
71 | // |
72 | // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html |
73 | // for more information. |
74 | #if defined(__GNUC__) |
75 | // Cache line alignment |
76 | #if defined(__i386__) || defined(__x86_64__) |
77 | #define ABSL_CACHELINE_SIZE 64 |
78 | #elif defined(__powerpc64__) |
79 | #define ABSL_CACHELINE_SIZE 128 |
80 | #elif defined(__aarch64__) |
81 | // We would need to read special register ctr_el0 to find out L1 dcache size. |
82 | // This value is a good estimate based on a real aarch64 machine. |
83 | #define ABSL_CACHELINE_SIZE 64 |
84 | #elif defined(__arm__) |
85 | // Cache line sizes for ARM: These values are not strictly correct since |
86 | // cache line sizes depend on implementations, not architectures. There |
87 | // are even implementations with cache line sizes configurable at boot |
88 | // time. |
89 | #if defined(__ARM_ARCH_5T__) |
90 | #define ABSL_CACHELINE_SIZE 32 |
91 | #elif defined(__ARM_ARCH_7A__) |
92 | #define ABSL_CACHELINE_SIZE 64 |
93 | #endif |
94 | #endif |
95 | #endif |
96 | |
97 | #ifndef ABSL_CACHELINE_SIZE |
98 | // A reasonable default guess. Note that overestimates tend to waste more |
99 | // space, while underestimates tend to waste more time. |
100 | #define ABSL_CACHELINE_SIZE 64 |
101 | #endif |
102 | |
103 | // ABSL_CACHELINE_ALIGNED |
104 | // |
105 | // Indicates that the declared object be cache aligned using |
106 | // `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to |
107 | // load a set of related objects in the L1 cache for performance improvements. |
108 | // Cacheline aligning objects properly allows constructive memory sharing and |
109 | // prevents destructive (or "false") memory sharing. |
110 | // |
111 | // NOTE: callers should replace uses of this macro with `alignas()` using |
112 | // `std::hardware_constructive_interference_size` and/or |
113 | // `std::hardware_destructive_interference_size` when C++17 becomes available to |
114 | // them. |
115 | // |
116 | // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html |
117 | // for more information. |
118 | // |
119 | // On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__` |
120 | // or `__declspec` attribute. For compilers where this is not known to work, |
121 | // the macro expands to nothing. |
122 | // |
123 | // No further guarantees are made here. The result of applying the macro |
124 | // to variables and types is always implementation-defined. |
125 | // |
126 | // WARNING: It is easy to use this attribute incorrectly, even to the point |
127 | // of causing bugs that are difficult to diagnose, crash, etc. It does not |
128 | // of itself guarantee that objects are aligned to a cache line. |
129 | // |
130 | // NOTE: Some compilers are picky about the locations of annotations such as |
131 | // this attribute, so prefer to put it at the beginning of your declaration. |
132 | // For example, |
133 | // |
134 | // ABSL_CACHELINE_ALIGNED static Foo* foo = ... |
135 | // |
136 | // class ABSL_CACHELINE_ALIGNED Bar { ... |
137 | // |
138 | // Recommendations: |
139 | // |
140 | // 1) Consult compiler documentation; this comment is not kept in sync as |
141 | // toolchains evolve. |
142 | // 2) Verify your use has the intended effect. This often requires inspecting |
143 | // the generated machine code. |
144 | // 3) Prefer applying this attribute to individual variables. Avoid |
145 | // applying it to types. This tends to localize the effect. |
146 | #if defined(__clang__) || defined(__GNUC__) |
147 | #define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE))) |
148 | #elif defined(_MSC_VER) |
149 | #define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE)) |
150 | #else |
151 | #define ABSL_CACHELINE_ALIGNED |
152 | #endif |
153 | |
154 | // ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE |
155 | // |
156 | // Enables the compiler to prioritize compilation using static analysis for |
157 | // likely paths within a boolean branch. |
158 | // |
159 | // Example: |
160 | // |
161 | // if (ABSL_PREDICT_TRUE(expression)) { |
162 | // return result; // Faster if more likely |
163 | // } else { |
164 | // return 0; |
165 | // } |
166 | // |
167 | // Compilers can use the information that a certain branch is not likely to be |
168 | // taken (for instance, a CHECK failure) to optimize for the common case in |
169 | // the absence of better information (ie. compiling gcc with `-fprofile-arcs`). |
170 | // |
171 | // Recommendation: Modern CPUs dynamically predict branch execution paths, |
172 | // typically with accuracy greater than 97%. As a result, annotating every |
173 | // branch in a codebase is likely counterproductive; however, annotating |
174 | // specific branches that are both hot and consistently mispredicted is likely |
175 | // to yield performance improvements. |
176 | #if ABSL_HAVE_BUILTIN(__builtin_expect) || \ |
177 | (defined(__GNUC__) && !defined(__clang__)) |
178 | #define ABSL_PREDICT_FALSE(x) (__builtin_expect(false || (x), false)) |
179 | #define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) |
180 | #else |
181 | #define ABSL_PREDICT_FALSE(x) (x) |
182 | #define ABSL_PREDICT_TRUE(x) (x) |
183 | #endif |
184 | |
185 | // `ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL()` aborts the program in the fastest |
186 | // possible way, with no attempt at logging. One use is to implement hardening |
187 | // aborts with ABSL_OPTION_HARDENED. Since this is an internal symbol, it |
188 | // should not be used directly outside of Abseil. |
189 | #if ABSL_HAVE_BUILTIN(__builtin_trap) || \ |
190 | (defined(__GNUC__) && !defined(__clang__)) |
191 | #define ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL() __builtin_trap() |
192 | #else |
193 | #define ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL() abort() |
194 | #endif |
195 | |
196 | // `ABSL_INTERNAL_UNREACHABLE_IMPL()` is the platform specific directive to |
197 | // indicate that a statement is unreachable, and to allow the compiler to |
198 | // optimize accordingly. Clients should use `ABSL_UNREACHABLE()`, which is |
199 | // defined below. |
200 | #if defined(__cpp_lib_unreachable) && __cpp_lib_unreachable >= 202202L |
201 | #define ABSL_INTERNAL_UNREACHABLE_IMPL() std::unreachable() |
202 | #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) |
203 | #define ABSL_INTERNAL_UNREACHABLE_IMPL() __builtin_unreachable() |
204 | #elif ABSL_HAVE_BUILTIN(__builtin_assume) |
205 | #define ABSL_INTERNAL_UNREACHABLE_IMPL() __builtin_assume(false) |
206 | #elif defined(_MSC_VER) |
207 | #define ABSL_INTERNAL_UNREACHABLE_IMPL() __assume(false) |
208 | #else |
209 | #define ABSL_INTERNAL_UNREACHABLE_IMPL() |
210 | #endif |
211 | |
212 | // `ABSL_UNREACHABLE()` is an unreachable statement. A program which reaches |
213 | // one has undefined behavior, and the compiler may optimize accordingly. |
214 | #if ABSL_OPTION_HARDENED == 1 && defined(NDEBUG) |
215 | // Abort in hardened mode to avoid dangerous undefined behavior. |
216 | #define ABSL_UNREACHABLE() \ |
217 | do { \ |
218 | ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL(); \ |
219 | ABSL_INTERNAL_UNREACHABLE_IMPL(); \ |
220 | } while (false) |
221 | #else |
222 | // The assert only fires in debug mode to aid in debugging. |
223 | // When NDEBUG is defined, reaching ABSL_UNREACHABLE() is undefined behavior. |
224 | #define ABSL_UNREACHABLE() \ |
225 | do { \ |
226 | /* NOLINTNEXTLINE: misc-static-assert */ \ |
227 | assert(false && "ABSL_UNREACHABLE reached"); \ |
228 | ABSL_INTERNAL_UNREACHABLE_IMPL(); \ |
229 | } while (false) |
230 | #endif |
231 | |
232 | // ABSL_ASSUME(cond) |
233 | // |
234 | // Informs the compiler that a condition is always true and that it can assume |
235 | // it to be true for optimization purposes. |
236 | // |
237 | // WARNING: If the condition is false, the program can produce undefined and |
238 | // potentially dangerous behavior. |
239 | // |
240 | // In !NDEBUG mode, the condition is checked with an assert(). |
241 | // |
242 | // NOTE: The expression must not have side effects, as it may only be evaluated |
243 | // in some compilation modes and not others. Some compilers may issue a warning |
244 | // if the compiler cannot prove the expression has no side effects. For example, |
245 | // the expression should not use a function call since the compiler cannot prove |
246 | // that a function call does not have side effects. |
247 | // |
248 | // Example: |
249 | // |
250 | // int x = ...; |
251 | // ABSL_ASSUME(x >= 0); |
252 | // // The compiler can optimize the division to a simple right shift using the |
253 | // // assumption specified above. |
254 | // int y = x / 16; |
255 | // |
256 | #if !defined(NDEBUG) |
257 | #define ABSL_ASSUME(cond) assert(cond) |
258 | #elif ABSL_HAVE_BUILTIN(__builtin_assume) |
259 | #define ABSL_ASSUME(cond) __builtin_assume(cond) |
260 | #elif defined(_MSC_VER) |
261 | #define ABSL_ASSUME(cond) __assume(cond) |
262 | #elif defined(__cpp_lib_unreachable) && __cpp_lib_unreachable >= 202202L |
263 | #define ABSL_ASSUME(cond) \ |
264 | do { \ |
265 | if (!(cond)) std::unreachable(); \ |
266 | } while (false) |
267 | #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) |
268 | #define ABSL_ASSUME(cond) \ |
269 | do { \ |
270 | if (!(cond)) __builtin_unreachable(); \ |
271 | } while (false) |
272 | #else |
273 | #define ABSL_ASSUME(cond) \ |
274 | do { \ |
275 | static_cast<void>(false && (cond)); \ |
276 | } while (false) |
277 | #endif |
278 | |
279 | // ABSL_INTERNAL_UNIQUE_SMALL_NAME(cond) |
280 | // This macro forces small unique name on a static file level symbols like |
281 | // static local variables or static functions. This is intended to be used in |
282 | // macro definitions to optimize the cost of generated code. Do NOT use it on |
283 | // symbols exported from translation unit since it may cause a link time |
284 | // conflict. |
285 | // |
286 | // Example: |
287 | // |
288 | // #define MY_MACRO(txt) |
289 | // namespace { |
290 | // char VeryVeryLongVarName[] ABSL_INTERNAL_UNIQUE_SMALL_NAME() = txt; |
291 | // const char* VeryVeryLongFuncName() ABSL_INTERNAL_UNIQUE_SMALL_NAME(); |
292 | // const char* VeryVeryLongFuncName() { return txt; } |
293 | // } |
294 | // |
295 | |
296 | #if defined(__GNUC__) |
297 | #define ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) #x |
298 | #define ABSL_INTERNAL_UNIQUE_SMALL_NAME1(x) ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) |
299 | #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() \ |
300 | asm(ABSL_INTERNAL_UNIQUE_SMALL_NAME1(.absl.__COUNTER__)) |
301 | #else |
302 | #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() |
303 | #endif |
304 | |
305 | #endif // ABSL_BASE_OPTIMIZATION_H_ |
306 | |