Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/ScaledNumber.h
Warning:line 790, column 14
The result of the left shift is undefined due to shifting by '16383', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name BlockFrequencyInfoImpl.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp

1//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/GraphTraits.h"
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/SCCIterator.h"
19#include "llvm/Config/llvm-config.h"
20#include "llvm/IR/Function.h"
21#include "llvm/Support/BlockFrequency.h"
22#include "llvm/Support/BranchProbability.h"
23#include "llvm/Support/Compiler.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/ScaledNumber.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32#include <iterator>
33#include <list>
34#include <numeric>
35#include <utility>
36#include <vector>
37
38using namespace llvm;
39using namespace llvm::bfi_detail;
40
41#define DEBUG_TYPE"block-freq" "block-freq"
42
43namespace llvm {
44cl::opt<bool> CheckBFIUnknownBlockQueries(
45 "check-bfi-unknown-block-queries",
46 cl::init(false), cl::Hidden,
47 cl::desc("Check if block frequency is queried for an unknown block "
48 "for debugging missed BFI updates"));
49
50cl::opt<bool> UseIterativeBFIInference(
51 "use-iterative-bfi-inference", cl::init(false), cl::Hidden, cl::ZeroOrMore,
52 cl::desc("Apply an iterative post-processing to infer correct BFI counts"));
53
54cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock(
55 "iterative-bfi-max-iterations-per-block", cl::init(1000), cl::Hidden,
56 cl::desc("Iterative inference: maximum number of update iterations "
57 "per block"));
58
59cl::opt<double> IterativeBFIPrecision(
60 "iterative-bfi-precision", cl::init(1e-12), cl::Hidden,
61 cl::desc("Iterative inference: delta convergence precision; smaller values "
62 "typically lead to better results at the cost of worsen runtime"));
63}
64
65ScaledNumber<uint64_t> BlockMass::toScaled() const {
66 if (isFull())
67 return ScaledNumber<uint64_t>(1, 0);
68 return ScaledNumber<uint64_t>(getMass() + 1, -64);
69}
70
71#if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP)
72LLVM_DUMP_METHOD__attribute__((noinline)) void BlockMass::dump() const { print(dbgs()); }
73#endif
74
75static char getHexDigit(int N) {
76 assert(N < 16)((void)0);
77 if (N < 10)
78 return '0' + N;
79 return 'a' + N - 10;
80}
81
82raw_ostream &BlockMass::print(raw_ostream &OS) const {
83 for (int Digits = 0; Digits < 16; ++Digits)
84 OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
85 return OS;
86}
87
88namespace {
89
90using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
91using Distribution = BlockFrequencyInfoImplBase::Distribution;
92using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
93using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
94using LoopData = BlockFrequencyInfoImplBase::LoopData;
95using Weight = BlockFrequencyInfoImplBase::Weight;
96using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
97
98/// Dithering mass distributer.
99///
100/// This class splits up a single mass into portions by weight, dithering to
101/// spread out error. No mass is lost. The dithering precision depends on the
102/// precision of the product of \a BlockMass and \a BranchProbability.
103///
104/// The distribution algorithm follows.
105///
106/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
107/// mass to distribute in \a RemMass.
108///
109/// 2. For each portion:
110///
111/// 1. Construct a branch probability, P, as the portion's weight divided
112/// by the current value of \a RemWeight.
113/// 2. Calculate the portion's mass as \a RemMass times P.
114/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
115/// the current portion's weight and mass.
116struct DitheringDistributer {
117 uint32_t RemWeight;
118 BlockMass RemMass;
119
120 DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
121
122 BlockMass takeMass(uint32_t Weight);
123};
124
125} // end anonymous namespace
126
127DitheringDistributer::DitheringDistributer(Distribution &Dist,
128 const BlockMass &Mass) {
129 Dist.normalize();
130 RemWeight = Dist.Total;
131 RemMass = Mass;
132}
133
134BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
135 assert(Weight && "invalid weight")((void)0);
136 assert(Weight <= RemWeight)((void)0);
137 BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
138
139 // Decrement totals (dither).
140 RemWeight -= Weight;
141 RemMass -= Mass;
142 return Mass;
143}
144
145void Distribution::add(const BlockNode &Node, uint64_t Amount,
146 Weight::DistType Type) {
147 assert(Amount && "invalid weight of 0")((void)0);
148 uint64_t NewTotal = Total + Amount;
149
150 // Check for overflow. It should be impossible to overflow twice.
151 bool IsOverflow = NewTotal < Total;
152 assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow")((void)0);
153 DidOverflow |= IsOverflow;
154
155 // Update the total.
156 Total = NewTotal;
157
158 // Save the weight.
159 Weights.push_back(Weight(Type, Node, Amount));
160}
161
162static void combineWeight(Weight &W, const Weight &OtherW) {
163 assert(OtherW.TargetNode.isValid())((void)0);
164 if (!W.Amount) {
165 W = OtherW;
166 return;
167 }
168 assert(W.Type == OtherW.Type)((void)0);
169 assert(W.TargetNode == OtherW.TargetNode)((void)0);
170 assert(OtherW.Amount && "Expected non-zero weight")((void)0);
171 if (W.Amount > W.Amount + OtherW.Amount)
172 // Saturate on overflow.
173 W.Amount = UINT64_MAX0xffffffffffffffffULL;
174 else
175 W.Amount += OtherW.Amount;
176}
177
178static void combineWeightsBySorting(WeightList &Weights) {
179 // Sort so edges to the same node are adjacent.
180 llvm::sort(Weights, [](const Weight &L, const Weight &R) {
181 return L.TargetNode < R.TargetNode;
182 });
183
184 // Combine adjacent edges.
185 WeightList::iterator O = Weights.begin();
186 for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
187 ++O, (I = L)) {
188 *O = *I;
189
190 // Find the adjacent weights to the same node.
191 for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
192 combineWeight(*O, *L);
193 }
194
195 // Erase extra entries.
196 Weights.erase(O, Weights.end());
197}
198
199static void combineWeightsByHashing(WeightList &Weights) {
200 // Collect weights into a DenseMap.
201 using HashTable = DenseMap<BlockNode::IndexType, Weight>;
202
203 HashTable Combined(NextPowerOf2(2 * Weights.size()));
204 for (const Weight &W : Weights)
205 combineWeight(Combined[W.TargetNode.Index], W);
206
207 // Check whether anything changed.
208 if (Weights.size() == Combined.size())
209 return;
210
211 // Fill in the new weights.
212 Weights.clear();
213 Weights.reserve(Combined.size());
214 for (const auto &I : Combined)
215 Weights.push_back(I.second);
216}
217
218static void combineWeights(WeightList &Weights) {
219 // Use a hash table for many successors to keep this linear.
220 if (Weights.size() > 128) {
221 combineWeightsByHashing(Weights);
222 return;
223 }
224
225 combineWeightsBySorting(Weights);
226}
227
228static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
229 assert(Shift >= 0)((void)0);
230 assert(Shift < 64)((void)0);
231 if (!Shift)
232 return N;
233 return (N >> Shift) + (UINT64_C(1)1ULL & N >> (Shift - 1));
234}
235
236void Distribution::normalize() {
237 // Early exit for termination nodes.
238 if (Weights.empty())
239 return;
240
241 // Only bother if there are multiple successors.
242 if (Weights.size() > 1)
243 combineWeights(Weights);
244
245 // Early exit when combined into a single successor.
246 if (Weights.size() == 1) {
247 Total = 1;
248 Weights.front().Amount = 1;
249 return;
250 }
251
252 // Determine how much to shift right so that the total fits into 32-bits.
253 //
254 // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
255 // for each weight can cause a 32-bit overflow.
256 int Shift = 0;
257 if (DidOverflow)
258 Shift = 33;
259 else if (Total > UINT32_MAX0xffffffffU)
260 Shift = 33 - countLeadingZeros(Total);
261
262 // Early exit if nothing needs to be scaled.
263 if (!Shift) {
264 // If we didn't overflow then combineWeights() shouldn't have changed the
265 // sum of the weights, but let's double-check.
266 assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),((void)0)
267 [](uint64_t Sum, const Weight &W) {((void)0)
268 return Sum + W.Amount;((void)0)
269 }) &&((void)0)
270 "Expected total to be correct")((void)0);
271 return;
272 }
273
274 // Recompute the total through accumulation (rather than shifting it) so that
275 // it's accurate after shifting and any changes combineWeights() made above.
276 Total = 0;
277
278 // Sum the weights to each node and shift right if necessary.
279 for (Weight &W : Weights) {
280 // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
281 // can round here without concern about overflow.
282 assert(W.TargetNode.isValid())((void)0);
283 W.Amount = std::max(UINT64_C(1)1ULL, shiftRightAndRound(W.Amount, Shift));
284 assert(W.Amount <= UINT32_MAX)((void)0);
285
286 // Update the total.
287 Total += W.Amount;
288 }
289 assert(Total <= UINT32_MAX)((void)0);
290}
291
292void BlockFrequencyInfoImplBase::clear() {
293 // Swap with a default-constructed std::vector, since std::vector<>::clear()
294 // does not actually clear heap storage.
295 std::vector<FrequencyData>().swap(Freqs);
296 IsIrrLoopHeader.clear();
297 std::vector<WorkingData>().swap(Working);
298 Loops.clear();
299}
300
301/// Clear all memory not needed downstream.
302///
303/// Releases all memory not used downstream. In particular, saves Freqs.
304static void cleanup(BlockFrequencyInfoImplBase &BFI) {
305 std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
306 SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
307 BFI.clear();
308 BFI.Freqs = std::move(SavedFreqs);
309 BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
310}
311
312bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
313 const LoopData *OuterLoop,
314 const BlockNode &Pred,
315 const BlockNode &Succ,
316 uint64_t Weight) {
317 if (!Weight)
318 Weight = 1;
319
320 auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
321 return OuterLoop && OuterLoop->isHeader(Node);
322 };
323
324 BlockNode Resolved = Working[Succ.Index].getResolvedNode();
325
326#ifndef NDEBUG1
327 auto debugSuccessor = [&](const char *Type) {
328 dbgs() << " =>"
329 << " [" << Type << "] weight = " << Weight;
330 if (!isLoopHeader(Resolved))
331 dbgs() << ", succ = " << getBlockName(Succ);
332 if (Resolved != Succ)
333 dbgs() << ", resolved = " << getBlockName(Resolved);
334 dbgs() << "\n";
335 };
336 (void)debugSuccessor;
337#endif
338
339 if (isLoopHeader(Resolved)) {
340 LLVM_DEBUG(debugSuccessor("backedge"))do { } while (false);
341 Dist.addBackedge(Resolved, Weight);
342 return true;
343 }
344
345 if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
346 LLVM_DEBUG(debugSuccessor(" exit "))do { } while (false);
347 Dist.addExit(Resolved, Weight);
348 return true;
349 }
350
351 if (Resolved < Pred) {
352 if (!isLoopHeader(Pred)) {
353 // If OuterLoop is an irreducible loop, we can't actually handle this.
354 assert((!OuterLoop || !OuterLoop->isIrreducible()) &&((void)0)
355 "unhandled irreducible control flow")((void)0);
356
357 // Irreducible backedge. Abort.
358 LLVM_DEBUG(debugSuccessor("abort!!!"))do { } while (false);
359 return false;
360 }
361
362 // If "Pred" is a loop header, then this isn't really a backedge; rather,
363 // OuterLoop must be irreducible. These false backedges can come only from
364 // secondary loop headers.
365 assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&((void)0)
366 "unhandled irreducible control flow")((void)0);
367 }
368
369 LLVM_DEBUG(debugSuccessor(" local "))do { } while (false);
370 Dist.addLocal(Resolved, Weight);
371 return true;
372}
373
374bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
375 const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
376 // Copy the exit map into Dist.
377 for (const auto &I : Loop.Exits)
378 if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
379 I.second.getMass()))
380 // Irreducible backedge.
381 return false;
382
383 return true;
384}
385
386/// Compute the loop scale for a loop.
387void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
388 // Compute loop scale.
389 LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n")do { } while (false);
390
391 // Infinite loops need special handling. If we give the back edge an infinite
392 // mass, they may saturate all the other scales in the function down to 1,
393 // making all the other region temperatures look exactly the same. Choose an
394 // arbitrary scale to avoid these issues.
395 //
396 // FIXME: An alternate way would be to select a symbolic scale which is later
397 // replaced to be the maximum of all computed scales plus 1. This would
398 // appropriately describe the loop as having a large scale, without skewing
399 // the final frequency computation.
400 const Scaled64 InfiniteLoopScale(1, 12);
401
402 // LoopScale == 1 / ExitMass
403 // ExitMass == HeadMass - BackedgeMass
404 BlockMass TotalBackedgeMass;
405 for (auto &Mass : Loop.BackedgeMass)
406 TotalBackedgeMass += Mass;
407 BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
408
409 // Block scale stores the inverse of the scale. If this is an infinite loop,
410 // its exit mass will be zero. In this case, use an arbitrary scale for the
411 // loop scale.
412 Loop.Scale =
413 ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
414
415 LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("do { } while (false)
416 << BlockMass::getFull() << " - " << TotalBackedgeMassdo { } while (false)
417 << ")\n"do { } while (false)
418 << " - scale = " << Loop.Scale << "\n")do { } while (false);
419}
420
421/// Package up a loop.
422void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
423 LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n")do { } while (false);
424
425 // Clear the subloop exits to prevent quadratic memory usage.
426 for (const BlockNode &M : Loop.Nodes) {
427 if (auto *Loop = Working[M.Index].getPackagedLoop())
428 Loop->Exits.clear();
429 LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n")do { } while (false);
430 }
431 Loop.IsPackaged = true;
432}
433
434#ifndef NDEBUG1
435static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
436 const DitheringDistributer &D, const BlockNode &T,
437 const BlockMass &M, const char *Desc) {
438 dbgs() << " => assign " << M << " (" << D.RemMass << ")";
439 if (Desc)
440 dbgs() << " [" << Desc << "]";
441 if (T.isValid())
442 dbgs() << " to " << BFI.getBlockName(T);
443 dbgs() << "\n";
444}
445#endif
446
447void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
448 LoopData *OuterLoop,
449 Distribution &Dist) {
450 BlockMass Mass = Working[Source.Index].getMass();
451 LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n")do { } while (false);
452
453 // Distribute mass to successors as laid out in Dist.
454 DitheringDistributer D(Dist, Mass);
455
456 for (const Weight &W : Dist.Weights) {
457 // Check for a local edge (non-backedge and non-exit).
458 BlockMass Taken = D.takeMass(W.Amount);
459 if (W.Type == Weight::Local) {
460 Working[W.TargetNode.Index].getMass() += Taken;
461 LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr))do { } while (false);
462 continue;
463 }
464
465 // Backedges and exits only make sense if we're processing a loop.
466 assert(OuterLoop && "backedge or exit outside of loop")((void)0);
467
468 // Check for a backedge.
469 if (W.Type == Weight::Backedge) {
470 OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
471 LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"))do { } while (false);
472 continue;
473 }
474
475 // This must be an exit.
476 assert(W.Type == Weight::Exit)((void)0);
477 OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
478 LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"))do { } while (false);
479 }
480}
481
482static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
483 const Scaled64 &Min, const Scaled64 &Max) {
484 // Scale the Factor to a size that creates integers. Ideally, integers would
485 // be scaled so that Max == UINT64_MAX so that they can be best
486 // differentiated. However, in the presence of large frequency values, small
487 // frequencies are scaled down to 1, making it impossible to differentiate
488 // small, unequal numbers. When the spread between Min and Max frequencies
489 // fits well within MaxBits, we make the scale be at least 8.
490 const unsigned MaxBits = 64;
491 const unsigned SpreadBits = (Max / Min).lg();
492 Scaled64 ScalingFactor;
493 if (SpreadBits <= MaxBits - 3) {
4
Taking false branch
494 // If the values are small enough, make the scaling factor at least 8 to
495 // allow distinguishing small values.
496 ScalingFactor = Min.inverse();
497 ScalingFactor <<= 3;
498 } else {
499 // If the values need more than MaxBits to be represented, saturate small
500 // frequency values down to 1 by using a scaling factor that benefits large
501 // frequency values.
502 ScalingFactor = Scaled64(1, MaxBits) / Max;
503 }
504
505 // Translate the floats to integers.
506 LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Maxdo { } while (false)
5
Loop condition is false. Exiting loop
507 << ", factor = " << ScalingFactor << "\n")do { } while (false);
508 for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
6
Assuming the condition is true
7
Loop condition is true. Entering loop body
509 Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
8
Calling 'operator*<unsigned long long>'
10
Returning from 'operator*<unsigned long long>'
510 BFI.Freqs[Index].Integer = std::max(UINT64_C(1)1ULL, Scaled.toInt<uint64_t>());
11
Calling 'ScaledNumber::toInt'
511 LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "do { } while (false)
512 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaleddo { } while (false)
513 << ", int = " << BFI.Freqs[Index].Integer << "\n")do { } while (false);
514 }
515}
516
517/// Unwrap a loop package.
518///
519/// Visits all the members of a loop, adjusting their BlockData according to
520/// the loop's pseudo-node.
521static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
522 LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)do { } while (false)
523 << ": mass = " << Loop.Mass << ", scale = " << Loop.Scaledo { } while (false)
524 << "\n")do { } while (false);
525 Loop.Scale *= Loop.Mass.toScaled();
526 Loop.IsPackaged = false;
527 LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n")do { } while (false);
528
529 // Propagate the head scale through the loop. Since members are visited in
530 // RPO, the head scale will be updated by the loop scale first, and then the
531 // final head scale will be used for updated the rest of the members.
532 for (const BlockNode &N : Loop.Nodes) {
533 const auto &Working = BFI.Working[N.Index];
534 Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
535 : BFI.Freqs[N.Index].Scaled;
536 Scaled64 New = Loop.Scale * F;
537 LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "do { } while (false)
538 << New << "\n")do { } while (false);
539 F = New;
540 }
541}
542
543void BlockFrequencyInfoImplBase::unwrapLoops() {
544 // Set initial frequencies from loop-local masses.
545 for (size_t Index = 0; Index < Working.size(); ++Index)
546 Freqs[Index].Scaled = Working[Index].Mass.toScaled();
547
548 for (LoopData &Loop : Loops)
549 unwrapLoop(*this, Loop);
550}
551
552void BlockFrequencyInfoImplBase::finalizeMetrics() {
553 // Unwrap loop packages in reverse post-order, tracking min and max
554 // frequencies.
555 auto Min = Scaled64::getLargest();
556 auto Max = Scaled64::getZero();
557 for (size_t Index = 0; Index < Working.size(); ++Index) {
1
Assuming the condition is false
2
Loop condition is false. Execution continues on line 564
558 // Update min/max scale.
559 Min = std::min(Min, Freqs[Index].Scaled);
560 Max = std::max(Max, Freqs[Index].Scaled);
561 }
562
563 // Convert to integers.
564 convertFloatingToInteger(*this, Min, Max);
3
Calling 'convertFloatingToInteger'
565
566 // Clean up data structures.
567 cleanup(*this);
568
569 // Print out the final stats.
570 LLVM_DEBUG(dump())do { } while (false);
571}
572
573BlockFrequency
574BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
575 if (!Node.isValid()) {
576#ifndef NDEBUG1
577 if (CheckBFIUnknownBlockQueries) {
578 SmallString<256> Msg;
579 raw_svector_ostream OS(Msg);
580 OS << "*** Detected BFI query for unknown block " << getBlockName(Node);
581 report_fatal_error(OS.str());
582 }
583#endif
584 return 0;
585 }
586 return Freqs[Node.Index].Integer;
587}
588
589Optional<uint64_t>
590BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
591 const BlockNode &Node,
592 bool AllowSynthetic) const {
593 return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
594 AllowSynthetic);
595}
596
597Optional<uint64_t>
598BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
599 uint64_t Freq,
600 bool AllowSynthetic) const {
601 auto EntryCount = F.getEntryCount(AllowSynthetic);
602 if (!EntryCount)
603 return None;
604 // Use 128 bit APInt to do the arithmetic to avoid overflow.
605 APInt BlockCount(128, EntryCount.getCount());
606 APInt BlockFreq(128, Freq);
607 APInt EntryFreq(128, getEntryFreq());
608 BlockCount *= BlockFreq;
609 // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
610 // lshr by 1 gives EntryFreq/2.
611 BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
612 return BlockCount.getLimitedValue();
613}
614
615bool
616BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
617 if (!Node.isValid())
618 return false;
619 return IsIrrLoopHeader.test(Node.Index);
620}
621
622Scaled64
623BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
624 if (!Node.isValid())
625 return Scaled64::getZero();
626 return Freqs[Node.Index].Scaled;
627}
628
629void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
630 uint64_t Freq) {
631 assert(Node.isValid() && "Expected valid node")((void)0);
632 assert(Node.Index < Freqs.size() && "Expected legal index")((void)0);
633 Freqs[Node.Index].Integer = Freq;
634}
635
636std::string
637BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
638 return {};
639}
640
641std::string
642BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
643 return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
644}
645
646raw_ostream &
647BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
648 const BlockNode &Node) const {
649 return OS << getFloatingBlockFreq(Node);
650}
651
652raw_ostream &
653BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
654 const BlockFrequency &Freq) const {
655 Scaled64 Block(Freq.getFrequency(), 0);
656 Scaled64 Entry(getEntryFreq(), 0);
657
658 return OS << Block / Entry;
659}
660
661void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
662 Start = OuterLoop.getHeader();
663 Nodes.reserve(OuterLoop.Nodes.size());
664 for (auto N : OuterLoop.Nodes)
665 addNode(N);
666 indexNodes();
667}
668
669void IrreducibleGraph::addNodesInFunction() {
670 Start = 0;
671 for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
672 if (!BFI.Working[Index].isPackaged())
673 addNode(Index);
674 indexNodes();
675}
676
677void IrreducibleGraph::indexNodes() {
678 for (auto &I : Nodes)
679 Lookup[I.Node.Index] = &I;
680}
681
682void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
683 const BFIBase::LoopData *OuterLoop) {
684 if (OuterLoop && OuterLoop->isHeader(Succ))
685 return;
686 auto L = Lookup.find(Succ.Index);
687 if (L == Lookup.end())
688 return;
689 IrrNode &SuccIrr = *L->second;
690 Irr.Edges.push_back(&SuccIrr);
691 SuccIrr.Edges.push_front(&Irr);
692 ++SuccIrr.NumIn;
693}
694
695namespace llvm {
696
697template <> struct GraphTraits<IrreducibleGraph> {
698 using GraphT = bfi_detail::IrreducibleGraph;
699 using NodeRef = const GraphT::IrrNode *;
700 using ChildIteratorType = GraphT::IrrNode::iterator;
701
702 static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
703 static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
704 static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
705};
706
707} // end namespace llvm
708
709/// Find extra irreducible headers.
710///
711/// Find entry blocks and other blocks with backedges, which exist when \c G
712/// contains irreducible sub-SCCs.
713static void findIrreducibleHeaders(
714 const BlockFrequencyInfoImplBase &BFI,
715 const IrreducibleGraph &G,
716 const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
717 LoopData::NodeList &Headers, LoopData::NodeList &Others) {
718 // Map from nodes in the SCC to whether it's an entry block.
719 SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
720
721 // InSCC also acts the set of nodes in the graph. Seed it.
722 for (const auto *I : SCC)
723 InSCC[I] = false;
724
725 for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
726 auto &Irr = *I->first;
727 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
728 if (InSCC.count(P))
729 continue;
730
731 // This is an entry block.
732 I->second = true;
733 Headers.push_back(Irr.Node);
734 LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)do { } while (false)
735 << "\n")do { } while (false);
736 break;
737 }
738 }
739 assert(Headers.size() >= 2 &&((void)0)
740 "Expected irreducible CFG; -loop-info is likely invalid")((void)0);
741 if (Headers.size() == InSCC.size()) {
742 // Every block is a header.
743 llvm::sort(Headers);
744 return;
745 }
746
747 // Look for extra headers from irreducible sub-SCCs.
748 for (const auto &I : InSCC) {
749 // Entry blocks are already headers.
750 if (I.second)
751 continue;
752
753 auto &Irr = *I.first;
754 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
755 // Skip forward edges.
756 if (P->Node < Irr.Node)
757 continue;
758
759 // Skip predecessors from entry blocks. These can have inverted
760 // ordering.
761 if (InSCC.lookup(P))
762 continue;
763
764 // Store the extra header.
765 Headers.push_back(Irr.Node);
766 LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)do { } while (false)
767 << "\n")do { } while (false);
768 break;
769 }
770 if (Headers.back() == Irr.Node)
771 // Added this as a header.
772 continue;
773
774 // This is not a header.
775 Others.push_back(Irr.Node);
776 LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n")do { } while (false);
777 }
778 llvm::sort(Headers);
779 llvm::sort(Others);
780}
781
782static void createIrreducibleLoop(
783 BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
784 LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
785 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
786 // Translate the SCC into RPO.
787 LLVM_DEBUG(dbgs() << " - found-scc\n")do { } while (false);
788
789 LoopData::NodeList Headers;
790 LoopData::NodeList Others;
791 findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
792
793 auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
794 Headers.end(), Others.begin(), Others.end());
795
796 // Update loop hierarchy.
797 for (const auto &N : Loop->Nodes)
798 if (BFI.Working[N.Index].isLoopHeader())
799 BFI.Working[N.Index].Loop->Parent = &*Loop;
800 else
801 BFI.Working[N.Index].Loop = &*Loop;
802}
803
804iterator_range<std::list<LoopData>::iterator>
805BlockFrequencyInfoImplBase::analyzeIrreducible(
806 const IrreducibleGraph &G, LoopData *OuterLoop,
807 std::list<LoopData>::iterator Insert) {
808 assert((OuterLoop == nullptr) == (Insert == Loops.begin()))((void)0);
809 auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
810
811 for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
812 if (I->size() < 2)
813 continue;
814
815 // Translate the SCC into RPO.
816 createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
817 }
818
819 if (OuterLoop)
820 return make_range(std::next(Prev), Insert);
821 return make_range(Loops.begin(), Insert);
822}
823
824void
825BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
826 OuterLoop.Exits.clear();
827 for (auto &Mass : OuterLoop.BackedgeMass)
828 Mass = BlockMass::getEmpty();
829 auto O = OuterLoop.Nodes.begin() + 1;
830 for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
831 if (!Working[I->Index].isPackaged())
832 *O++ = *I;
833 OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
834}
835
836void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
837 assert(Loop.isIrreducible() && "this only makes sense on irreducible loops")((void)0);
838
839 // Since the loop has more than one header block, the mass flowing back into
840 // each header will be different. Adjust the mass in each header loop to
841 // reflect the masses flowing through back edges.
842 //
843 // To do this, we distribute the initial mass using the backedge masses
844 // as weights for the distribution.
845 BlockMass LoopMass = BlockMass::getFull();
846 Distribution Dist;
847
848 LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n")do { } while (false);
849 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
850 auto &HeaderNode = Loop.Nodes[H];
851 auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
852 LLVM_DEBUG(dbgs() << " - Add back edge mass for node "do { } while (false)
853 << getBlockName(HeaderNode) << ": " << BackedgeMassdo { } while (false)
854 << "\n")do { } while (false);
855 if (BackedgeMass.getMass() > 0)
856 Dist.addLocal(HeaderNode, BackedgeMass.getMass());
857 else
858 LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n")do { } while (false);
859 }
860
861 DitheringDistributer D(Dist, LoopMass);
862
863 LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMassdo { } while (false)
864 << " to headers using above weights\n")do { } while (false);
865 for (const Weight &W : Dist.Weights) {
866 BlockMass Taken = D.takeMass(W.Amount);
867 assert(W.Type == Weight::Local && "all weights should be local")((void)0);
868 Working[W.TargetNode.Index].getMass() = Taken;
869 LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr))do { } while (false);
870 }
871}
872
873void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
874 BlockMass LoopMass = BlockMass::getFull();
875 DitheringDistributer D(Dist, LoopMass);
876 for (const Weight &W : Dist.Weights) {
877 BlockMass Taken = D.takeMass(W.Amount);
878 assert(W.Type == Weight::Local && "all weights should be local")((void)0);
879 Working[W.TargetNode.Index].getMass() = Taken;
880 LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr))do { } while (false);
881 }
882}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/ScaledNumber.h

1//===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains functions (and a class) useful for working with scaled
10// numbers -- in particular, pairs of integers where one represents digits and
11// another represents a scale. The functions are helpers and live in the
12// namespace ScaledNumbers. The class ScaledNumber is useful for modelling
13// certain cost metrics that need simple, integer-like semantics that are easy
14// to reason about.
15//
16// These might remind you of soft-floats. If you want one of those, you're in
17// the wrong place. Look at include/llvm/ADT/APFloat.h instead.
18//
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_SUPPORT_SCALEDNUMBER_H
22#define LLVM_SUPPORT_SCALEDNUMBER_H
23
24#include "llvm/Support/MathExtras.h"
25#include <algorithm>
26#include <cstdint>
27#include <limits>
28#include <string>
29#include <tuple>
30#include <utility>
31
32namespace llvm {
33namespace ScaledNumbers {
34
35/// Maximum scale; same as APFloat for easy debug printing.
36const int32_t MaxScale = 16383;
37
38/// Maximum scale; same as APFloat for easy debug printing.
39const int32_t MinScale = -16382;
40
41/// Get the width of a number.
42template <class DigitsT> inline int getWidth() { return sizeof(DigitsT) * 8; }
43
44/// Conditionally round up a scaled number.
45///
46/// Given \c Digits and \c Scale, round up iff \c ShouldRound is \c true.
47/// Always returns \c Scale unless there's an overflow, in which case it
48/// returns \c 1+Scale.
49///
50/// \pre adding 1 to \c Scale will not overflow INT16_MAX.
51template <class DigitsT>
52inline std::pair<DigitsT, int16_t> getRounded(DigitsT Digits, int16_t Scale,
53 bool ShouldRound) {
54 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
55
56 if (ShouldRound)
57 if (!++Digits)
58 // Overflow.
59 return std::make_pair(DigitsT(1) << (getWidth<DigitsT>() - 1), Scale + 1);
60 return std::make_pair(Digits, Scale);
61}
62
63/// Convenience helper for 32-bit rounding.
64inline std::pair<uint32_t, int16_t> getRounded32(uint32_t Digits, int16_t Scale,
65 bool ShouldRound) {
66 return getRounded(Digits, Scale, ShouldRound);
67}
68
69/// Convenience helper for 64-bit rounding.
70inline std::pair<uint64_t, int16_t> getRounded64(uint64_t Digits, int16_t Scale,
71 bool ShouldRound) {
72 return getRounded(Digits, Scale, ShouldRound);
73}
74
75/// Adjust a 64-bit scaled number down to the appropriate width.
76///
77/// \pre Adding 64 to \c Scale will not overflow INT16_MAX.
78template <class DigitsT>
79inline std::pair<DigitsT, int16_t> getAdjusted(uint64_t Digits,
80 int16_t Scale = 0) {
81 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
82
83 const int Width = getWidth<DigitsT>();
84 if (Width == 64 || Digits <= std::numeric_limits<DigitsT>::max())
85 return std::make_pair(Digits, Scale);
86
87 // Shift right and round.
88 int Shift = 64 - Width - countLeadingZeros(Digits);
89 return getRounded<DigitsT>(Digits >> Shift, Scale + Shift,
90 Digits & (UINT64_C(1)1ULL << (Shift - 1)));
91}
92
93/// Convenience helper for adjusting to 32 bits.
94inline std::pair<uint32_t, int16_t> getAdjusted32(uint64_t Digits,
95 int16_t Scale = 0) {
96 return getAdjusted<uint32_t>(Digits, Scale);
97}
98
99/// Convenience helper for adjusting to 64 bits.
100inline std::pair<uint64_t, int16_t> getAdjusted64(uint64_t Digits,
101 int16_t Scale = 0) {
102 return getAdjusted<uint64_t>(Digits, Scale);
103}
104
105/// Multiply two 64-bit integers to create a 64-bit scaled number.
106///
107/// Implemented with four 64-bit integer multiplies.
108std::pair<uint64_t, int16_t> multiply64(uint64_t LHS, uint64_t RHS);
109
110/// Multiply two 32-bit integers to create a 32-bit scaled number.
111///
112/// Implemented with one 64-bit integer multiply.
113template <class DigitsT>
114inline std::pair<DigitsT, int16_t> getProduct(DigitsT LHS, DigitsT RHS) {
115 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
116
117 if (getWidth<DigitsT>() <= 32 || (LHS <= UINT32_MAX0xffffffffU && RHS <= UINT32_MAX0xffffffffU))
118 return getAdjusted<DigitsT>(uint64_t(LHS) * RHS);
119
120 return multiply64(LHS, RHS);
121}
122
123/// Convenience helper for 32-bit product.
124inline std::pair<uint32_t, int16_t> getProduct32(uint32_t LHS, uint32_t RHS) {
125 return getProduct(LHS, RHS);
126}
127
128/// Convenience helper for 64-bit product.
129inline std::pair<uint64_t, int16_t> getProduct64(uint64_t LHS, uint64_t RHS) {
130 return getProduct(LHS, RHS);
131}
132
133/// Divide two 64-bit integers to create a 64-bit scaled number.
134///
135/// Implemented with long division.
136///
137/// \pre \c Dividend and \c Divisor are non-zero.
138std::pair<uint64_t, int16_t> divide64(uint64_t Dividend, uint64_t Divisor);
139
140/// Divide two 32-bit integers to create a 32-bit scaled number.
141///
142/// Implemented with one 64-bit integer divide/remainder pair.
143///
144/// \pre \c Dividend and \c Divisor are non-zero.
145std::pair<uint32_t, int16_t> divide32(uint32_t Dividend, uint32_t Divisor);
146
147/// Divide two 32-bit numbers to create a 32-bit scaled number.
148///
149/// Implemented with one 64-bit integer divide/remainder pair.
150///
151/// Returns \c (DigitsT_MAX, MaxScale) for divide-by-zero (0 for 0/0).
152template <class DigitsT>
153std::pair<DigitsT, int16_t> getQuotient(DigitsT Dividend, DigitsT Divisor) {
154 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
155 static_assert(sizeof(DigitsT) == 4 || sizeof(DigitsT) == 8,
156 "expected 32-bit or 64-bit digits");
157
158 // Check for zero.
159 if (!Dividend)
160 return std::make_pair(0, 0);
161 if (!Divisor)
162 return std::make_pair(std::numeric_limits<DigitsT>::max(), MaxScale);
163
164 if (getWidth<DigitsT>() == 64)
165 return divide64(Dividend, Divisor);
166 return divide32(Dividend, Divisor);
167}
168
169/// Convenience helper for 32-bit quotient.
170inline std::pair<uint32_t, int16_t> getQuotient32(uint32_t Dividend,
171 uint32_t Divisor) {
172 return getQuotient(Dividend, Divisor);
173}
174
175/// Convenience helper for 64-bit quotient.
176inline std::pair<uint64_t, int16_t> getQuotient64(uint64_t Dividend,
177 uint64_t Divisor) {
178 return getQuotient(Dividend, Divisor);
179}
180
181/// Implementation of getLg() and friends.
182///
183/// Returns the rounded lg of \c Digits*2^Scale and an int specifying whether
184/// this was rounded up (1), down (-1), or exact (0).
185///
186/// Returns \c INT32_MIN when \c Digits is zero.
187template <class DigitsT>
188inline std::pair<int32_t, int> getLgImpl(DigitsT Digits, int16_t Scale) {
189 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
190
191 if (!Digits)
192 return std::make_pair(INT32_MIN(-0x7fffffff - 1), 0);
193
194 // Get the floor of the lg of Digits.
195 int32_t LocalFloor = sizeof(Digits) * 8 - countLeadingZeros(Digits) - 1;
196
197 // Get the actual floor.
198 int32_t Floor = Scale + LocalFloor;
199 if (Digits == UINT64_C(1)1ULL << LocalFloor)
200 return std::make_pair(Floor, 0);
201
202 // Round based on the next digit.
203 assert(LocalFloor >= 1)((void)0);
204 bool Round = Digits & UINT64_C(1)1ULL << (LocalFloor - 1);
205 return std::make_pair(Floor + Round, Round ? 1 : -1);
206}
207
208/// Get the lg (rounded) of a scaled number.
209///
210/// Get the lg of \c Digits*2^Scale.
211///
212/// Returns \c INT32_MIN when \c Digits is zero.
213template <class DigitsT> int32_t getLg(DigitsT Digits, int16_t Scale) {
214 return getLgImpl(Digits, Scale).first;
215}
216
217/// Get the lg floor of a scaled number.
218///
219/// Get the floor of the lg of \c Digits*2^Scale.
220///
221/// Returns \c INT32_MIN when \c Digits is zero.
222template <class DigitsT> int32_t getLgFloor(DigitsT Digits, int16_t Scale) {
223 auto Lg = getLgImpl(Digits, Scale);
224 return Lg.first - (Lg.second > 0);
225}
226
227/// Get the lg ceiling of a scaled number.
228///
229/// Get the ceiling of the lg of \c Digits*2^Scale.
230///
231/// Returns \c INT32_MIN when \c Digits is zero.
232template <class DigitsT> int32_t getLgCeiling(DigitsT Digits, int16_t Scale) {
233 auto Lg = getLgImpl(Digits, Scale);
234 return Lg.first + (Lg.second < 0);
235}
236
237/// Implementation for comparing scaled numbers.
238///
239/// Compare two 64-bit numbers with different scales. Given that the scale of
240/// \c L is higher than that of \c R by \c ScaleDiff, compare them. Return -1,
241/// 1, and 0 for less than, greater than, and equal, respectively.
242///
243/// \pre 0 <= ScaleDiff < 64.
244int compareImpl(uint64_t L, uint64_t R, int ScaleDiff);
245
246/// Compare two scaled numbers.
247///
248/// Compare two scaled numbers. Returns 0 for equal, -1 for less than, and 1
249/// for greater than.
250template <class DigitsT>
251int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale) {
252 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
253
254 // Check for zero.
255 if (!LDigits)
256 return RDigits ? -1 : 0;
257 if (!RDigits)
258 return 1;
259
260 // Check for the scale. Use getLgFloor to be sure that the scale difference
261 // is always lower than 64.
262 int32_t lgL = getLgFloor(LDigits, LScale), lgR = getLgFloor(RDigits, RScale);
263 if (lgL != lgR)
264 return lgL < lgR ? -1 : 1;
265
266 // Compare digits.
267 if (LScale < RScale)
268 return compareImpl(LDigits, RDigits, RScale - LScale);
269
270 return -compareImpl(RDigits, LDigits, LScale - RScale);
271}
272
273/// Match scales of two numbers.
274///
275/// Given two scaled numbers, match up their scales. Change the digits and
276/// scales in place. Shift the digits as necessary to form equivalent numbers,
277/// losing precision only when necessary.
278///
279/// If the output value of \c LDigits (\c RDigits) is \c 0, the output value of
280/// \c LScale (\c RScale) is unspecified.
281///
282/// As a convenience, returns the matching scale. If the output value of one
283/// number is zero, returns the scale of the other. If both are zero, which
284/// scale is returned is unspecified.
285template <class DigitsT>
286int16_t matchScales(DigitsT &LDigits, int16_t &LScale, DigitsT &RDigits,
287 int16_t &RScale) {
288 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
289
290 if (LScale < RScale)
291 // Swap arguments.
292 return matchScales(RDigits, RScale, LDigits, LScale);
293 if (!LDigits)
294 return RScale;
295 if (!RDigits || LScale == RScale)
296 return LScale;
297
298 // Now LScale > RScale. Get the difference.
299 int32_t ScaleDiff = int32_t(LScale) - RScale;
300 if (ScaleDiff >= 2 * getWidth<DigitsT>()) {
301 // Don't bother shifting. RDigits will get zero-ed out anyway.
302 RDigits = 0;
303 return LScale;
304 }
305
306 // Shift LDigits left as much as possible, then shift RDigits right.
307 int32_t ShiftL = std::min<int32_t>(countLeadingZeros(LDigits), ScaleDiff);
308 assert(ShiftL < getWidth<DigitsT>() && "can't shift more than width")((void)0);
309
310 int32_t ShiftR = ScaleDiff - ShiftL;
311 if (ShiftR >= getWidth<DigitsT>()) {
312 // Don't bother shifting. RDigits will get zero-ed out anyway.
313 RDigits = 0;
314 return LScale;
315 }
316
317 LDigits <<= ShiftL;
318 RDigits >>= ShiftR;
319
320 LScale -= ShiftL;
321 RScale += ShiftR;
322 assert(LScale == RScale && "scales should match")((void)0);
323 return LScale;
324}
325
326/// Get the sum of two scaled numbers.
327///
328/// Get the sum of two scaled numbers with as much precision as possible.
329///
330/// \pre Adding 1 to \c LScale (or \c RScale) will not overflow INT16_MAX.
331template <class DigitsT>
332std::pair<DigitsT, int16_t> getSum(DigitsT LDigits, int16_t LScale,
333 DigitsT RDigits, int16_t RScale) {
334 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
335
336 // Check inputs up front. This is only relevant if addition overflows, but
337 // testing here should catch more bugs.
338 assert(LScale < INT16_MAX && "scale too large")((void)0);
339 assert(RScale < INT16_MAX && "scale too large")((void)0);
340
341 // Normalize digits to match scales.
342 int16_t Scale = matchScales(LDigits, LScale, RDigits, RScale);
343
344 // Compute sum.
345 DigitsT Sum = LDigits + RDigits;
346 if (Sum >= RDigits)
347 return std::make_pair(Sum, Scale);
348
349 // Adjust sum after arithmetic overflow.
350 DigitsT HighBit = DigitsT(1) << (getWidth<DigitsT>() - 1);
351 return std::make_pair(HighBit | Sum >> 1, Scale + 1);
352}
353
354/// Convenience helper for 32-bit sum.
355inline std::pair<uint32_t, int16_t> getSum32(uint32_t LDigits, int16_t LScale,
356 uint32_t RDigits, int16_t RScale) {
357 return getSum(LDigits, LScale, RDigits, RScale);
358}
359
360/// Convenience helper for 64-bit sum.
361inline std::pair<uint64_t, int16_t> getSum64(uint64_t LDigits, int16_t LScale,
362 uint64_t RDigits, int16_t RScale) {
363 return getSum(LDigits, LScale, RDigits, RScale);
364}
365
366/// Get the difference of two scaled numbers.
367///
368/// Get LHS minus RHS with as much precision as possible.
369///
370/// Returns \c (0, 0) if the RHS is larger than the LHS.
371template <class DigitsT>
372std::pair<DigitsT, int16_t> getDifference(DigitsT LDigits, int16_t LScale,
373 DigitsT RDigits, int16_t RScale) {
374 static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
375
376 // Normalize digits to match scales.
377 const DigitsT SavedRDigits = RDigits;
378 const int16_t SavedRScale = RScale;
379 matchScales(LDigits, LScale, RDigits, RScale);
380
381 // Compute difference.
382 if (LDigits <= RDigits)
383 return std::make_pair(0, 0);
384 if (RDigits || !SavedRDigits)
385 return std::make_pair(LDigits - RDigits, LScale);
386
387 // Check if RDigits just barely lost its last bit. E.g., for 32-bit:
388 //
389 // 1*2^32 - 1*2^0 == 0xffffffff != 1*2^32
390 const auto RLgFloor = getLgFloor(SavedRDigits, SavedRScale);
391 if (!compare(LDigits, LScale, DigitsT(1), RLgFloor + getWidth<DigitsT>()))
392 return std::make_pair(std::numeric_limits<DigitsT>::max(), RLgFloor);
393
394 return std::make_pair(LDigits, LScale);
395}
396
397/// Convenience helper for 32-bit difference.
398inline std::pair<uint32_t, int16_t> getDifference32(uint32_t LDigits,
399 int16_t LScale,
400 uint32_t RDigits,
401 int16_t RScale) {
402 return getDifference(LDigits, LScale, RDigits, RScale);
403}
404
405/// Convenience helper for 64-bit difference.
406inline std::pair<uint64_t, int16_t> getDifference64(uint64_t LDigits,
407 int16_t LScale,
408 uint64_t RDigits,
409 int16_t RScale) {
410 return getDifference(LDigits, LScale, RDigits, RScale);
411}
412
413} // end namespace ScaledNumbers
414} // end namespace llvm
415
416namespace llvm {
417
418class raw_ostream;
419class ScaledNumberBase {
420public:
421 static constexpr int DefaultPrecision = 10;
422
423 static void dump(uint64_t D, int16_t E, int Width);
424 static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width,
425 unsigned Precision);
426 static std::string toString(uint64_t D, int16_t E, int Width,
427 unsigned Precision);
428 static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); }
429 static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); }
430 static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); }
431
432 static std::pair<uint64_t, bool> splitSigned(int64_t N) {
433 if (N >= 0)
434 return std::make_pair(N, false);
435 uint64_t Unsigned = N == INT64_MIN(-0x7fffffffffffffffLL - 1) ? UINT64_C(1)1ULL << 63 : uint64_t(-N);
436 return std::make_pair(Unsigned, true);
437 }
438 static int64_t joinSigned(uint64_t U, bool IsNeg) {
439 if (U > uint64_t(INT64_MAX0x7fffffffffffffffLL))
440 return IsNeg ? INT64_MIN(-0x7fffffffffffffffLL - 1) : INT64_MAX0x7fffffffffffffffLL;
441 return IsNeg ? -int64_t(U) : int64_t(U);
442 }
443};
444
445/// Simple representation of a scaled number.
446///
447/// ScaledNumber is a number represented by digits and a scale. It uses simple
448/// saturation arithmetic and every operation is well-defined for every value.
449/// It's somewhat similar in behaviour to a soft-float, but is *not* a
450/// replacement for one. If you're doing numerics, look at \a APFloat instead.
451/// Nevertheless, we've found these semantics useful for modelling certain cost
452/// metrics.
453///
454/// The number is split into a signed scale and unsigned digits. The number
455/// represented is \c getDigits()*2^getScale(). In this way, the digits are
456/// much like the mantissa in the x87 long double, but there is no canonical
457/// form so the same number can be represented by many bit representations.
458///
459/// ScaledNumber is templated on the underlying integer type for digits, which
460/// is expected to be unsigned.
461///
462/// Unlike APFloat, ScaledNumber does not model architecture floating point
463/// behaviour -- while this might make it a little faster and easier to reason
464/// about, it certainly makes it more dangerous for general numerics.
465///
466/// ScaledNumber is totally ordered. However, there is no canonical form, so
467/// there are multiple representations of most scalars. E.g.:
468///
469/// ScaledNumber(8u, 0) == ScaledNumber(4u, 1)
470/// ScaledNumber(4u, 1) == ScaledNumber(2u, 2)
471/// ScaledNumber(2u, 2) == ScaledNumber(1u, 3)
472///
473/// ScaledNumber implements most arithmetic operations. Precision is kept
474/// where possible. Uses simple saturation arithmetic, so that operations
475/// saturate to 0.0 or getLargest() rather than under or overflowing. It has
476/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0.
477/// Any other division by 0.0 is defined to be getLargest().
478///
479/// As a convenience for modifying the exponent, left and right shifting are
480/// both implemented, and both interpret negative shifts as positive shifts in
481/// the opposite direction.
482///
483/// Scales are limited to the range accepted by x87 long double. This makes
484/// it trivial to add functionality to convert to APFloat (this is already
485/// relied on for the implementation of printing).
486///
487/// Possible (and conflicting) future directions:
488///
489/// 1. Turn this into a wrapper around \a APFloat.
490/// 2. Share the algorithm implementations with \a APFloat.
491/// 3. Allow \a ScaledNumber to represent a signed number.
492template <class DigitsT> class ScaledNumber : ScaledNumberBase {
493public:
494 static_assert(!std::numeric_limits<DigitsT>::is_signed,
495 "only unsigned floats supported");
496
497 typedef DigitsT DigitsType;
498
499private:
500 typedef std::numeric_limits<DigitsType> DigitsLimits;
501
502 static constexpr int Width = sizeof(DigitsType) * 8;
503 static_assert(Width <= 64, "invalid integer width for digits");
504
505private:
506 DigitsType Digits = 0;
507 int16_t Scale = 0;
508
509public:
510 ScaledNumber() = default;
511
512 constexpr ScaledNumber(DigitsType Digits, int16_t Scale)
513 : Digits(Digits), Scale(Scale) {}
514
515private:
516 ScaledNumber(const std::pair<DigitsT, int16_t> &X)
517 : Digits(X.first), Scale(X.second) {}
518
519public:
520 static ScaledNumber getZero() { return ScaledNumber(0, 0); }
521 static ScaledNumber getOne() { return ScaledNumber(1, 0); }
522 static ScaledNumber getLargest() {
523 return ScaledNumber(DigitsLimits::max(), ScaledNumbers::MaxScale);
524 }
525 static ScaledNumber get(uint64_t N) { return adjustToWidth(N, 0); }
526 static ScaledNumber getInverse(uint64_t N) {
527 return get(N).invert();
528 }
529 static ScaledNumber getFraction(DigitsType N, DigitsType D) {
530 return getQuotient(N, D);
531 }
532
533 int16_t getScale() const { return Scale; }
534 DigitsType getDigits() const { return Digits; }
535
536 /// Convert to the given integer type.
537 ///
538 /// Convert to \c IntT using simple saturating arithmetic, truncating if
539 /// necessary.
540 template <class IntT> IntT toInt() const;
541
542 bool isZero() const { return !Digits; }
543 bool isLargest() const { return *this == getLargest(); }
544 bool isOne() const {
545 if (Scale > 0 || Scale <= -Width)
546 return false;
547 return Digits == DigitsType(1) << -Scale;
548 }
549
550 /// The log base 2, rounded.
551 ///
552 /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN.
553 int32_t lg() const { return ScaledNumbers::getLg(Digits, Scale); }
554
555 /// The log base 2, rounded towards INT32_MIN.
556 ///
557 /// Get the lg floor. lg 0 is defined to be INT32_MIN.
558 int32_t lgFloor() const { return ScaledNumbers::getLgFloor(Digits, Scale); }
559
560 /// The log base 2, rounded towards INT32_MAX.
561 ///
562 /// Get the lg ceiling. lg 0 is defined to be INT32_MIN.
563 int32_t lgCeiling() const {
564 return ScaledNumbers::getLgCeiling(Digits, Scale);
565 }
566
567 bool operator==(const ScaledNumber &X) const { return compare(X) == 0; }
568 bool operator<(const ScaledNumber &X) const { return compare(X) < 0; }
569 bool operator!=(const ScaledNumber &X) const { return compare(X) != 0; }
570 bool operator>(const ScaledNumber &X) const { return compare(X) > 0; }
571 bool operator<=(const ScaledNumber &X) const { return compare(X) <= 0; }
572 bool operator>=(const ScaledNumber &X) const { return compare(X) >= 0; }
573
574 bool operator!() const { return isZero(); }
575
576 /// Convert to a decimal representation in a string.
577 ///
578 /// Convert to a string. Uses scientific notation for very large/small
579 /// numbers. Scientific notation is used roughly for numbers outside of the
580 /// range 2^-64 through 2^64.
581 ///
582 /// \c Precision indicates the number of decimal digits of precision to use;
583 /// 0 requests the maximum available.
584 ///
585 /// As a special case to make debugging easier, if the number is small enough
586 /// to convert without scientific notation and has more than \c Precision
587 /// digits before the decimal place, it's printed accurately to the first
588 /// digit past zero. E.g., assuming 10 digits of precision:
589 ///
590 /// 98765432198.7654... => 98765432198.8
591 /// 8765432198.7654... => 8765432198.8
592 /// 765432198.7654... => 765432198.8
593 /// 65432198.7654... => 65432198.77
594 /// 5432198.7654... => 5432198.765
595 std::string toString(unsigned Precision = DefaultPrecision) {
596 return ScaledNumberBase::toString(Digits, Scale, Width, Precision);
597 }
598
599 /// Print a decimal representation.
600 ///
601 /// Print a string. See toString for documentation.
602 raw_ostream &print(raw_ostream &OS,
603 unsigned Precision = DefaultPrecision) const {
604 return ScaledNumberBase::print(OS, Digits, Scale, Width, Precision);
605 }
606 void dump() const { return ScaledNumberBase::dump(Digits, Scale, Width); }
607
608 ScaledNumber &operator+=(const ScaledNumber &X) {
609 std::tie(Digits, Scale) =
610 ScaledNumbers::getSum(Digits, Scale, X.Digits, X.Scale);
611 // Check for exponent past MaxScale.
612 if (Scale > ScaledNumbers::MaxScale)
613 *this = getLargest();
614 return *this;
615 }
616 ScaledNumber &operator-=(const ScaledNumber &X) {
617 std::tie(Digits, Scale) =
618 ScaledNumbers::getDifference(Digits, Scale, X.Digits, X.Scale);
619 return *this;
620 }
621 ScaledNumber &operator*=(const ScaledNumber &X);
622 ScaledNumber &operator/=(const ScaledNumber &X);
623 ScaledNumber &operator<<=(int16_t Shift) {
624 shiftLeft(Shift);
625 return *this;
626 }
627 ScaledNumber &operator>>=(int16_t Shift) {
628 shiftRight(Shift);
629 return *this;
630 }
631
632private:
633 void shiftLeft(int32_t Shift);
634 void shiftRight(int32_t Shift);
635
636 /// Adjust two floats to have matching exponents.
637 ///
638 /// Adjust \c this and \c X to have matching exponents. Returns the new \c X
639 /// by value. Does nothing if \a isZero() for either.
640 ///
641 /// The value that compares smaller will lose precision, and possibly become
642 /// \a isZero().
643 ScaledNumber matchScales(ScaledNumber X) {
644 ScaledNumbers::matchScales(Digits, Scale, X.Digits, X.Scale);
645 return X;
646 }
647
648public:
649 /// Scale a large number accurately.
650 ///
651 /// Scale N (multiply it by this). Uses full precision multiplication, even
652 /// if Width is smaller than 64, so information is not lost.
653 uint64_t scale(uint64_t N) const;
654 uint64_t scaleByInverse(uint64_t N) const {
655 // TODO: implement directly, rather than relying on inverse. Inverse is
656 // expensive.
657 return inverse().scale(N);
658 }
659 int64_t scale(int64_t N) const {
660 std::pair<uint64_t, bool> Unsigned = splitSigned(N);
661 return joinSigned(scale(Unsigned.first), Unsigned.second);
662 }
663 int64_t scaleByInverse(int64_t N) const {
664 std::pair<uint64_t, bool> Unsigned = splitSigned(N);
665 return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second);
666 }
667
668 int compare(const ScaledNumber &X) const {
669 return ScaledNumbers::compare(Digits, Scale, X.Digits, X.Scale);
670 }
671 int compareTo(uint64_t N) const {
672 return ScaledNumbers::compare<uint64_t>(Digits, Scale, N, 0);
673 }
674 int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); }
675
676 ScaledNumber &invert() { return *this = ScaledNumber::get(1) / *this; }
677 ScaledNumber inverse() const { return ScaledNumber(*this).invert(); }
678
679private:
680 static ScaledNumber getProduct(DigitsType LHS, DigitsType RHS) {
681 return ScaledNumbers::getProduct(LHS, RHS);
682 }
683 static ScaledNumber getQuotient(DigitsType Dividend, DigitsType Divisor) {
684 return ScaledNumbers::getQuotient(Dividend, Divisor);
685 }
686
687 static int countLeadingZerosWidth(DigitsType Digits) {
688 if (Width == 64)
689 return countLeadingZeros64(Digits);
690 if (Width == 32)
691 return countLeadingZeros32(Digits);
692 return countLeadingZeros32(Digits) + Width - 32;
693 }
694
695 /// Adjust a number to width, rounding up if necessary.
696 ///
697 /// Should only be called for \c Shift close to zero.
698 ///
699 /// \pre Shift >= MinScale && Shift + 64 <= MaxScale.
700 static ScaledNumber adjustToWidth(uint64_t N, int32_t Shift) {
701 assert(Shift >= ScaledNumbers::MinScale && "Shift should be close to 0")((void)0);
702 assert(Shift <= ScaledNumbers::MaxScale - 64 &&((void)0)
703 "Shift should be close to 0")((void)0);
704 auto Adjusted = ScaledNumbers::getAdjusted<DigitsT>(N, Shift);
705 return Adjusted;
706 }
707
708 static ScaledNumber getRounded(ScaledNumber P, bool Round) {
709 // Saturate.
710 if (P.isLargest())
711 return P;
712
713 return ScaledNumbers::getRounded(P.Digits, P.Scale, Round);
714 }
715};
716
717#define SCALED_NUMBER_BOP(op, base) \
718 template <class DigitsT> \
719 ScaledNumber<DigitsT> operator op(const ScaledNumber<DigitsT> &L, \
720 const ScaledNumber<DigitsT> &R) { \
721 return ScaledNumber<DigitsT>(L) base R; \
722 }
723SCALED_NUMBER_BOP(+, += )
724SCALED_NUMBER_BOP(-, -= )
725SCALED_NUMBER_BOP(*, *= )
9
The value 16383 is assigned to 'Scaled.Scale'
726SCALED_NUMBER_BOP(/, /= )
727#undef SCALED_NUMBER_BOP
728
729template <class DigitsT>
730ScaledNumber<DigitsT> operator<<(const ScaledNumber<DigitsT> &L,
731 int16_t Shift) {
732 return ScaledNumber<DigitsT>(L) <<= Shift;
733}
734
735template <class DigitsT>
736ScaledNumber<DigitsT> operator>>(const ScaledNumber<DigitsT> &L,
737 int16_t Shift) {
738 return ScaledNumber<DigitsT>(L) >>= Shift;
739}
740
741template <class DigitsT>
742raw_ostream &operator<<(raw_ostream &OS, const ScaledNumber<DigitsT> &X) {
743 return X.print(OS, 10);
744}
745
746#define SCALED_NUMBER_COMPARE_TO_TYPE(op, T1, T2) \
747 template <class DigitsT> \
748 bool operator op(const ScaledNumber<DigitsT> &L, T1 R) { \
749 return L.compareTo(T2(R)) op 0; \
750 } \
751 template <class DigitsT> \
752 bool operator op(T1 L, const ScaledNumber<DigitsT> &R) { \
753 return 0 op R.compareTo(T2(L)); \
754 }
755#define SCALED_NUMBER_COMPARE_TO(op) \
756 SCALED_NUMBER_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \
757 SCALED_NUMBER_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \
758 SCALED_NUMBER_COMPARE_TO_TYPE(op, int64_t, int64_t) \
759 SCALED_NUMBER_COMPARE_TO_TYPE(op, int32_t, int64_t)
760SCALED_NUMBER_COMPARE_TO(< )
13
Assuming the condition is false
14
Returning zero, which participates in a condition later
761SCALED_NUMBER_COMPARE_TO(> )
762SCALED_NUMBER_COMPARE_TO(== )
763SCALED_NUMBER_COMPARE_TO(!= )
764SCALED_NUMBER_COMPARE_TO(<= )
765SCALED_NUMBER_COMPARE_TO(>= )
18
Assuming the condition is false
19
Returning zero, which participates in a condition later
766#undef SCALED_NUMBER_COMPARE_TO
767#undef SCALED_NUMBER_COMPARE_TO_TYPE
768
769template <class DigitsT>
770uint64_t ScaledNumber<DigitsT>::scale(uint64_t N) const {
771 if (Width == 64 || N <= DigitsLimits::max())
772 return (get(N) * *this).template toInt<uint64_t>();
773
774 // Defer to the 64-bit version.
775 return ScaledNumber<uint64_t>(Digits, Scale).scale(N);
776}
777
778template <class DigitsT>
779template <class IntT>
780IntT ScaledNumber<DigitsT>::toInt() const {
781 typedef std::numeric_limits<IntT> Limits;
782 if (*this < 1)
12
Calling 'operator<<unsigned long long>'
15
Returning from 'operator<<unsigned long long>'
16
Taking false branch
783 return 0;
784 if (*this >= Limits::max())
17
Calling 'operator>=<unsigned long long>'
20
Returning from 'operator>=<unsigned long long>'
21
Taking false branch
785 return Limits::max();
786
787 IntT N = Digits;
788 if (Scale
21.1
Field 'Scale' is > 0
21.1
Field 'Scale' is > 0
> 0) {
22
Taking true branch
789 assert(size_t(Scale) < sizeof(IntT) * 8)((void)0);
790 return N << Scale;
23
The result of the left shift is undefined due to shifting by '16383', which is greater or equal to the width of type 'unsigned long long'
791 }
792 if (Scale < 0) {
793 assert(size_t(-Scale) < sizeof(IntT) * 8)((void)0);
794 return N >> -Scale;
795 }
796 return N;
797}
798
799template <class DigitsT>
800ScaledNumber<DigitsT> &ScaledNumber<DigitsT>::
801operator*=(const ScaledNumber &X) {
802 if (isZero())
803 return *this;
804 if (X.isZero())
805 return *this = X;
806
807 // Save the exponents.
808 int32_t Scales = int32_t(Scale) + int32_t(X.Scale);
809
810 // Get the raw product.
811 *this = getProduct(Digits, X.Digits);
812
813 // Combine with exponents.
814 return *this <<= Scales;
815}
816template <class DigitsT>
817ScaledNumber<DigitsT> &ScaledNumber<DigitsT>::
818operator/=(const ScaledNumber &X) {
819 if (isZero())
820 return *this;
821 if (X.isZero())
822 return *this = getLargest();
823
824 // Save the exponents.
825 int32_t Scales = int32_t(Scale) - int32_t(X.Scale);
826
827 // Get the raw quotient.
828 *this = getQuotient(Digits, X.Digits);
829
830 // Combine with exponents.
831 return *this <<= Scales;
832}
833template <class DigitsT> void ScaledNumber<DigitsT>::shiftLeft(int32_t Shift) {
834 if (!Shift || isZero())
835 return;
836 assert(Shift != INT32_MIN)((void)0);
837 if (Shift < 0) {
838 shiftRight(-Shift);
839 return;
840 }
841
842 // Shift as much as we can in the exponent.
843 int32_t ScaleShift = std::min(Shift, ScaledNumbers::MaxScale - Scale);
844 Scale += ScaleShift;
845 if (ScaleShift == Shift)
846 return;
847
848 // Check this late, since it's rare.
849 if (isLargest())
850 return;
851
852 // Shift the digits themselves.
853 Shift -= ScaleShift;
854 if (Shift > countLeadingZerosWidth(Digits)) {
855 // Saturate.
856 *this = getLargest();
857 return;
858 }
859
860 Digits <<= Shift;
861}
862
863template <class DigitsT> void ScaledNumber<DigitsT>::shiftRight(int32_t Shift) {
864 if (!Shift || isZero())
865 return;
866 assert(Shift != INT32_MIN)((void)0);
867 if (Shift < 0) {
868 shiftLeft(-Shift);
869 return;
870 }
871
872 // Shift as much as we can in the exponent.
873 int32_t ScaleShift = std::min(Shift, Scale - ScaledNumbers::MinScale);
874 Scale -= ScaleShift;
875 if (ScaleShift == Shift)
876 return;
877
878 // Shift the digits themselves.
879 Shift -= ScaleShift;
880 if (Shift >= Width) {
881 // Saturate.
882 *this = getZero();
883 return;
884 }
885
886 Digits >>= Shift;
887}
888
889
890} // end namespace llvm
891
892#endif // LLVM_SUPPORT_SCALEDNUMBER_H