/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Bug Summary

File:	src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Warning:	line 212, column 36 Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InstCombineVectorOps.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

→

1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//

14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Operator.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
39#include "llvm/Transforms/InstCombine/InstCombiner.h"
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>

45using namespace llvm;
46using namespace PatternMatch;

48#define DEBUG_TYPE"instcombine" "instcombine"

50STATISTIC(NumAggregateReconstructionsSimplified,static llvm::Statistic NumAggregateReconstructionsSimplified =
 {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
 "original aggregate"}
        "Number of aggregate reconstructions turned into reuse of the "static llvm::Statistic NumAggregateReconstructionsSimplified =
 {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
 "original aggregate"}
        "original aggregate")static llvm::Statistic NumAggregateReconstructionsSimplified =
 {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
 "original aggregate"};

54/// Return true if the value is cheaper to scalarize than it is to leave as a
55/// vector operation. IsConstantExtractIndex indicates whether we are extracting
56/// one known element from a vector constant.
57///
58/// FIXME: It's possible to create more instructions than previously existed.
59static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
// If we can pick a scalar constant value out of a vector, that is free.
if (auto *C = dyn_cast<Constant>(V))
  return IsConstantExtractIndex || C->getSplatValue();

// An insertelement to the same constant index as our extract will simplify
// to the scalar inserted element. An insertelement to a different constant
// index is irrelevant to our extract.
if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
  return IsConstantExtractIndex;

if (match(V, m_OneUse(m_Load(m_Value()))))
  return true;

if (match(V, m_OneUse(m_UnOp())))
  return true;

Value *V0, *V1;
if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
      cheapToScalarize(V1, IsConstantExtractIndex))
    return true;

CmpInst::Predicate UnusedPred;
if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
      cheapToScalarize(V1, IsConstantExtractIndex))
    return true;

return false;
89}

91// If we have a PHI node with a vector type that is only used to feed
92// itself and be an operand of extractelement at a constant location,
93// try to replace the PHI of the vector type with a PHI of a scalar type.
94Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
                                          PHINode *PN) {
SmallVector<Instruction *, 2> Extracts;
// The users we want the PHI to have are:
// 1) The EI ExtractElement (we already know this)
// 2) Possibly more ExtractElements with the same index.
// 3) Another operand, which will feed back into the PHI.
Instruction *PHIUser = nullptr;
for (auto U : PN->users()) {
  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
    if (EI.getIndexOperand() == EU->getIndexOperand())
      Extracts.push_back(EU);
    else
      return nullptr;
  } else if (!PHIUser) {
    PHIUser = cast<Instruction>(U);
  } else {
    return nullptr;
  }
}

if (!PHIUser)
  return nullptr;

// Verify that this PHI user has one use, which is the PHI itself,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return nullptr.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
    !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
  return nullptr;

// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
    PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
// Scalarize each PHI operand.
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  Value *PHIInVal = PN->getIncomingValue(i);
  BasicBlock *inBB = PN->getIncomingBlock(i);
  Value *Elt = EI.getIndexOperand();
  // If the operand is the PHI induction variable:
  if (PHIInVal == PHIUser) {
    // Scalarize the binary operation. Its first operand is the
    // scalar PHI, and the second operand is extracted from the other
    // vector operand.
    BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
    unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
    Value *Op = InsertNewInstWith(
        ExtractElementInst::Create(B0->getOperand(opId), Elt,
                                   B0->getOperand(opId)->getName() + ".Elt"),
        *B0);
    Value *newPHIUser = InsertNewInstWith(
        BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
                                              scalarPHI, Op, B0), *B0);
    scalarPHI->addIncoming(newPHIUser, inBB);
  } else {
    // Scalarize PHI input:
    Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
    // Insert the new instruction into the predecessor basic block.
    Instruction *pos = dyn_cast<Instruction>(PHIInVal);
    BasicBlock::iterator InsertPos;
    if (pos && !isa<PHINode>(pos)) {
      InsertPos = ++pos->getIterator();
    } else {
      InsertPos = inBB->getFirstInsertionPt();
    }

    InsertNewInstWith(newEI, *InsertPos);

    scalarPHI->addIncoming(newEI, inBB);
  }
}

for (auto E : Extracts)
  replaceInstUsesWith(*E, scalarPHI);

return &EI;
171}

173static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
                                    InstCombiner::BuilderTy &Builder,
                                    bool IsBigEndian) {
Value *X;
uint64_t ExtIndexC;
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
11
←
Taking false branch→
    !X->getType()->isVectorTy() ||
    !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
  return nullptr;

// If this extractelement is using a bitcast from a vector of the same number
// of elements, see if we can find the source element from the source vector:
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
auto *SrcTy = cast<VectorType>(X->getType());
12
←
The object is a 'VectorType'→
Type *DestTy = Ext.getType();
ElementCount NumSrcElts = SrcTy->getElementCount();
13
←
Calling 'VectorType::getElementCount'→
19
←
Returning from 'VectorType::getElementCount'→
ElementCount NumElts =
    cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
20
←
The object is a 'VectorType'→
if (NumSrcElts == NumElts)
21
←
Calling 'UnivariateLinearPolyBase::operator=='→
24
←
Returning from 'UnivariateLinearPolyBase::operator=='→
25
←
Taking false branch→
  if (Value *Elt = findScalarElement(X, ExtIndexC))
    return new BitCastInst(Elt, DestTy);

assert(NumSrcElts.isScalable() == NumElts.isScalable() &&((void)0)
       "Src and Dst must be the same sort of vector type")((void)0);

// If the source elements are wider than the destination, try to shift and
// truncate a subset of scalar bits of an insert op.
if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
26
←
Assuming the condition is true→
27
←
Taking true branch→
  Value *Scalar;
  uint64_t InsIndexC;
  if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
28
←
Calling 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'→
36
←
Returning from 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'→
37
←
Taking false branch→
                            m_ConstantInt(InsIndexC))))
    return nullptr;

  // The extract must be from the subset of vector elements that we inserted
  // into. Example: if we inserted element 1 of a <2 x i64> and we are
  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
  // of elements 4-7 of the bitcasted vector.
  unsigned NarrowingRatio =
      NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
38
←
Calling 'LinearPolySize::getKnownMinValue'→
43
←
Returning from 'LinearPolySize::getKnownMinValue'→
44
←
Division by zero
  if (ExtIndexC / NarrowingRatio != InsIndexC)
    return nullptr;

  // We are extracting part of the original scalar. How that scalar is
  // inserted into the vector depends on the endian-ness. Example:
  //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
  //                                       +--+--+--+--+--+--+--+--+
  // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
  // extelt <4 x i16> V', 3:               |                 |S2|S3|
  //                                       +--+--+--+--+--+--+--+--+
  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
  // In this example, we must right-shift little-endian. Big-endian is just a
  // truncate.
  unsigned Chunk = ExtIndexC % NarrowingRatio;
  if (IsBigEndian)
    Chunk = NarrowingRatio - 1 - Chunk;

  // Bail out if this is an FP vector to FP vector sequence. That would take
  // more instructions than we started with unless there is no shift, and it
  // may not be handled as well in the backend.
  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
  bool NeedDestBitcast = DestTy->isFloatingPointTy();
  if (NeedSrcBitcast && NeedDestBitcast)
    return nullptr;

  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
  unsigned ShAmt = Chunk * DestWidth;

  // TODO: This limitation is more strict than necessary. We could sum the
  // number of new instructions and subtract the number eliminated to know if
  // we can proceed.
  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
    if (NeedSrcBitcast || NeedDestBitcast)
      return nullptr;

  if (NeedSrcBitcast) {
    Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
    Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
  }

  if (ShAmt) {
    // Bail out if we could end with more instructions than we started with.
    if (!Ext.getVectorOperand()->hasOneUse())
      return nullptr;
    Scalar = Builder.CreateLShr(Scalar, ShAmt);
  }

  if (NeedDestBitcast) {
    Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
    return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
  }
  return new TruncInst(Scalar, DestTy);
}

return nullptr;
270}

272/// Find elements of V demanded by UserInstr.
273static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();

// Conservatively assume that all elements are needed.
APInt UsedElts(APInt::getAllOnesValue(VWidth));

switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
  assert(EEI->getVectorOperand() == V)((void)0);
  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
    UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
  }
  break;
}
case Instruction::ShuffleVector: {
  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
  unsigned MaskNumElts =
      cast<FixedVectorType>(UserInstr->getType())->getNumElements();

  UsedElts = APInt(VWidth, 0);
  for (unsigned i = 0; i < MaskNumElts; i++) {
    unsigned MaskVal = Shuffle->getMaskValue(i);
    if (MaskVal == -1u || MaskVal >= 2 * VWidth)
      continue;
    if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
      UsedElts.setBit(MaskVal);
    if (Shuffle->getOperand(1) == V &&
        ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
      UsedElts.setBit(MaskVal - VWidth);
  }
  break;
}
default:
  break;
}
return UsedElts;
311}

313/// Find union of elements of V demanded by all its users.
314/// If it is known by querying findDemandedEltsBySingleUser that
315/// no user demands an element of V, then the corresponding bit
316/// remains unset in the returned value.
317static APInt findDemandedEltsByAllUsers(Value *V) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();

APInt UnionUsedElts(VWidth, 0);
for (const Use &U : V->uses()) {
  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
    UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
  } else {
    UnionUsedElts = APInt::getAllOnesValue(VWidth);
    break;
  }

  if (UnionUsedElts.isAllOnesValue())
    break;
}

return UnionUsedElts;
334}

336Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *SrcVec = EI.getVectorOperand();
Value *Index = EI.getIndexOperand();
if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
1
Assuming 'V' is null→
2
←
Taking false branch→
                                          SQ.getWithInstruction(&EI)))
  return replaceInstUsesWith(EI, V);

// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
auto *IndexC = dyn_cast<ConstantInt>(Index);
3
←
Assuming 'Index' is a 'ConstantInt'→
if (IndexC3.1
'IndexC' is non-null
1
'IndexC' is non-null
1
'IndexC' is non-null
1
'IndexC' is non-null
) {
4
←
Taking true branch→
  ElementCount EC = EI.getVectorOperandType()->getElementCount();
  unsigned NumElts = EC.getKnownMinValue();

  if (IntrinsicInst *II5.1
'II' is null
1
'II' is null
1
'II' is null
1
'II' is null
 = dyn_cast<IntrinsicInst>(SrcVec)) {
5
←
Assuming 'SrcVec' is not a 'IntrinsicInst'→
6
←
Taking false branch→
    Intrinsic::ID IID = II->getIntrinsicID();
    // Index needs to be lower than the minimum size of the vector, because
    // for scalable vector, the vector size is known at run time.
    if (IID == Intrinsic::experimental_stepvector &&
        IndexC->getValue().ult(NumElts)) {
      Type *Ty = EI.getType();
      unsigned BitWidth = Ty->getIntegerBitWidth();
      Value *Idx;
      // Return index when its value does not exceed the allowed limit
      // for the element type of the vector, otherwise return undefined.
      if (IndexC->getValue().getActiveBits() <= BitWidth)
        Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
      else
        Idx = UndefValue::get(Ty);
      return replaceInstUsesWith(EI, Idx);
    }
  }

  // InstSimplify should handle cases where the index is invalid.
  // For fixed-length vector, it's invalid to extract out-of-range element.
  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
7
←
Taking false branch→
    return nullptr;

  // This instruction only demands the single element from the input vector.
  // Skip for scalable type, the number of elements is unknown at
  // compile-time.
  if (!EC.isScalable() && NumElts != 1) {
8
←
Assuming 'NumElts' is equal to 1→
9
←
Taking false branch→
    // If the input vector has a single use, simplify it based on this use
    // property.
    if (SrcVec->hasOneUse()) {
      APInt UndefElts(NumElts, 0);
      APInt DemandedElts(NumElts, 0);
      DemandedElts.setBit(IndexC->getZExtValue());
      if (Value *V =
              SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
        return replaceOperand(EI, 0, V);
    } else {
      // If the input vector has multiple uses, simplify it based on a union
      // of all elements used.
      APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
      if (!DemandedElts.isAllOnesValue()) {
        APInt UndefElts(NumElts, 0);
        if (Value *V = SimplifyDemandedVectorElts(
                SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
                true /* AllowMultipleUsers */)) {
          if (V != SrcVec) {
            SrcVec->replaceAllUsesWith(V);
            return &EI;
          }
        }
      }
    }
  }

  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
10
←
Calling 'foldBitcastExtElt'→
    return I;

  // If there's a vector PHI feeding a scalar use through this extractelement
  // instruction, try to scalarize the PHI.
  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
    if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
      return ScalarPHI;
}

// TODO come up with a n-ary matcher that subsumes both unary and
// binary matchers.
UnaryOperator *UO;
if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
  // extelt (unop X), Index --> unop (extelt X, Index)
  Value *X = UO->getOperand(0);
  Value *E = Builder.CreateExtractElement(X, Index);
  return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
}

BinaryOperator *BO;
if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
  Value *E0 = Builder.CreateExtractElement(X, Index);
  Value *E1 = Builder.CreateExtractElement(Y, Index);
  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
}

Value *X, *Y;
CmpInst::Predicate Pred;
if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
    cheapToScalarize(SrcVec, IndexC)) {
  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
  Value *E0 = Builder.CreateExtractElement(X, Index);
  Value *E1 = Builder.CreateExtractElement(Y, Index);
  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
}

if (auto *I = dyn_cast<Instruction>(SrcVec)) {
  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
    // Extracting the inserted element?
    if (IE->getOperand(2) == Index)
      return replaceInstUsesWith(EI, IE->getOperand(1));
    // If the inserted and extracted elements are constants, they must not
    // be the same value, extract from the pre-inserted value instead.
    if (isa<Constant>(IE->getOperand(2)) && IndexC)
      return replaceOperand(EI, 0, IE->getOperand(0));
  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
    auto *VecType = cast<VectorType>(GEP->getType());
    ElementCount EC = VecType->getElementCount();
    uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
    if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
      // Find out why we have a vector result - these are a few examples:
      //  1. We have a scalar pointer and a vector of indices, or
      //  2. We have a vector of pointers and a scalar index, or
      //  3. We have a vector of pointers and a vector of indices, etc.
      // Here we only consider combining when there is exactly one vector
      // operand, since the optimization is less obviously a win due to
      // needing more than one extractelements.

      unsigned VectorOps =
          llvm::count_if(GEP->operands(), [](const Value *V) {
            return isa<VectorType>(V->getType());
          });
      if (VectorOps > 1)
        return nullptr;
      assert(VectorOps == 1 && "Expected exactly one vector GEP operand!")((void)0);

      Value *NewPtr = GEP->getPointerOperand();
      if (isa<VectorType>(NewPtr->getType()))
        NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);

      SmallVector<Value *> NewOps;
      for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
        Value *Op = GEP->getOperand(I);
        if (isa<VectorType>(Op->getType()))
          NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
        else
          NewOps.push_back(Op);
      }

      GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
          cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
          NewOps);
      NewGEP->setIsInBounds(GEP->isInBounds());
      return NewGEP;
    }
    return nullptr;
  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
    // If this is extracting an element from a shufflevector, figure out where
    // it came from and extract from the appropriate input element instead.
    // Restrict the following transformation to fixed-length vector.
    if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
      int SrcIdx =
          SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
      Value *Src;
      unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
                              ->getNumElements();

      if (SrcIdx < 0)
        return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
      if (SrcIdx < (int)LHSWidth)
        Src = SVI->getOperand(0);
      else {
        SrcIdx -= LHSWidth;
        Src = SVI->getOperand(1);
      }
      Type *Int32Ty = Type::getInt32Ty(EI.getContext());
      return ExtractElementInst::Create(
          Src, ConstantInt::get(Int32Ty, SrcIdx, false));
    }
  } else if (auto *CI = dyn_cast<CastInst>(I)) {
    // Canonicalize extractelement(cast) -> cast(extractelement).
    // Bitcasts can change the number of vector elements, and they cost
    // nothing.
    if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
      Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
      return CastInst::Create(CI->getOpcode(), EE, EI.getType());
    }
  }
}
return nullptr;
528}

530/// If V is a shuffle of values that ONLY returns elements from either LHS or
531/// RHS, return the shuffle mask and true. Otherwise, return false.
532static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                       SmallVectorImpl<int> &Mask) {
assert(LHS->getType() == RHS->getType() &&((void)0)
       "Invalid CollectSingleShuffleElements")((void)0);
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

if (match(V, m_Undef())) {
  Mask.assign(NumElts, -1);
  return true;
}

if (V == LHS) {
  for (unsigned i = 0; i != NumElts; ++i)
    Mask.push_back(i);
  return true;
}

if (V == RHS) {
  for (unsigned i = 0; i != NumElts; ++i)
    Mask.push_back(i + NumElts);
  return true;
}

if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
  // If this is an insert of an extract from some other vector, include it.
  Value *VecOp    = IEI->getOperand(0);
  Value *ScalarOp = IEI->getOperand(1);
  Value *IdxOp    = IEI->getOperand(2);

  if (!isa<ConstantInt>(IdxOp))
    return false;
  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

  if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
    // We can handle this if the vector we are inserting into is
    // transitively ok.
    if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
      // If so, update the mask to reflect the inserted undef.
      Mask[InsertedIdx] = -1;
      return true;
    }
  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
    if (isa<ConstantInt>(EI->getOperand(1))) {
      unsigned ExtractedIdx =
      cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
      unsigned NumLHSElts =
          cast<FixedVectorType>(LHS->getType())->getNumElements();

      // This must be extracting from either LHS or RHS.
      if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
        // We can handle this if the vector we are inserting into is
        // transitively ok.
        if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
          // If so, update the mask to reflect the inserted value.
          if (EI->getOperand(0) == LHS) {
            Mask[InsertedIdx % NumElts] = ExtractedIdx;
          } else {
            assert(EI->getOperand(0) == RHS)((void)0);
            Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
          }
          return true;
        }
      }
    }
  }
}

return false;
600}

602/// If we have insertion into a vector that is wider than the vector that we
603/// are extracting from, try to widen the source vector to allow a single
604/// shufflevector to replace one or more insert/extract pairs.
605static void replaceExtractElements(InsertElementInst *InsElt,
                                 ExtractElementInst *ExtElt,
                                 InstCombinerImpl &IC) {
auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
unsigned NumInsElts = InsVecType->getNumElements();
unsigned NumExtElts = ExtVecType->getNumElements();

// The inserted-to vector must be wider than the extracted-from vector.
if (InsVecType->getElementType() != ExtVecType->getElementType() ||
    NumExtElts >= NumInsElts)
  return;

// Create a shuffle mask to widen the extended-from vector using poison
// values. The mask selects all of the values of the original vector followed
// by as many poison values as needed to create a vector of the same length
// as the inserted-to vector.
SmallVector<int, 16> ExtendMask;
for (unsigned i = 0; i < NumExtElts; ++i)
  ExtendMask.push_back(i);
for (unsigned i = NumExtElts; i < NumInsElts; ++i)
  ExtendMask.push_back(-1);

Value *ExtVecOp = ExtElt->getVectorOperand();
auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
                                 ? ExtVecOpInst->getParent()
                                 : ExtElt->getParent();

// TODO: This restriction matches the basic block check below when creating
// new extractelement instructions. If that limitation is removed, this one
// could also be removed. But for now, we just bail out to ensure that we
// will replace the extractelement instruction that is feeding our
// insertelement instruction. This allows the insertelement to then be
// replaced by a shufflevector. If the insertelement is not replaced, we can
// induce infinite looping because there's an optimization for extractelement
// that will delete our widening shuffle. This would trigger another attempt
// here to create that shuffle, and we spin forever.
if (InsertionBlock != InsElt->getParent())
  return;

// TODO: This restriction matches the check in visitInsertElementInst() and
// prevents an infinite loop caused by not turning the extract/insert pair
// into a shuffle. We really should not need either check, but we're lacking
// folds for shufflevectors because we're afraid to generate shuffle masks
// that the backend can't handle.
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
  return;

auto *WideVec =
    new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);

// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
// extract, so any subsequent extracts in the same basic block can use it.
// TODO: Insert before the earliest ExtractElementInst that is replaced.
if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
  WideVec->insertAfter(ExtVecOpInst);
else
  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());

// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
for (User *U : ExtVecOp->users()) {
  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
  if (!OldExt || OldExt->getParent() != WideVec->getParent())
    continue;
  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
  NewExt->insertAfter(OldExt);
  IC.replaceInstUsesWith(*OldExt, NewExt);
}
676}

678/// We are building a shuffle to create V, which is a sequence of insertelement,
679/// extractelement pairs. If PermittedRHS is set, then we must either use it or
680/// not rely on the second vector source. Return a std::pair containing the
681/// left and right vectors of the proposed shuffle (or 0), and set the Mask
682/// parameter as required.
683///
684/// Note: we intentionally don't try to fold earlier shuffles since they have
685/// often been chosen carefully to be efficiently implementable on the target.
686using ShuffleOps = std::pair<Value *, Value *>;

688static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
                                       Value *PermittedRHS,
                                       InstCombinerImpl &IC) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!")((void)0);
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

if (match(V, m_Undef())) {
  Mask.assign(NumElts, -1);
  return std::make_pair(
      PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
}

if (isa<ConstantAggregateZero>(V)) {
  Mask.assign(NumElts, 0);
  return std::make_pair(V, nullptr);
}

if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
  // If this is an insert of an extract from some other vector, include it.
  Value *VecOp    = IEI->getOperand(0);
  Value *ScalarOp = IEI->getOperand(1);
  Value *IdxOp    = IEI->getOperand(2);

  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
      unsigned ExtractedIdx =
        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

      // Either the extracted from or inserted into vector must be RHSVec,
      // otherwise we'd end up with a shuffle of three inputs.
      if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
        Value *RHS = EI->getOperand(0);
        ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
        assert(LR.second == nullptr || LR.second == RHS)((void)0);

        if (LR.first->getType() != RHS->getType()) {
          // Although we are giving up for now, see if we can create extracts
          // that match the inserts for another round of combining.
          replaceExtractElements(IEI, EI, IC);

          // We tried our best, but we can't find anything compatible with RHS
          // further up the chain. Return a trivial shuffle.
          for (unsigned i = 0; i < NumElts; ++i)
            Mask[i] = i;
          return std::make_pair(V, nullptr);
        }

        unsigned NumLHSElts =
            cast<FixedVectorType>(RHS->getType())->getNumElements();
        Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
        return std::make_pair(LR.first, RHS);
      }

      if (VecOp == PermittedRHS) {
        // We've gone as far as we can: anything on the other side of the
        // extractelement will already have been converted into a shuffle.
        unsigned NumLHSElts =
            cast<FixedVectorType>(EI->getOperand(0)->getType())
                ->getNumElements();
        for (unsigned i = 0; i != NumElts; ++i)
          Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
        return std::make_pair(EI->getOperand(0), PermittedRHS);
      }

      // If this insertelement is a chain that comes from exactly these two
      // vectors, return the vector and the effective shuffle.
      if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
          collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
                                       Mask))
        return std::make_pair(EI->getOperand(0), PermittedRHS);
    }
  }
}

// Otherwise, we can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
  Mask.push_back(i);
return std::make_pair(V, nullptr);
767}

769/// Look for chain of insertvalue's that fully define an aggregate, and trace
770/// back the values inserted, see if they are all were extractvalue'd from
771/// the same source aggregate from the exact same element indexes.
772/// If they were, just reuse the source aggregate.
773/// This potentially deals with PHI indirections.
774Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
  InsertValueInst &OrigIVI) {
Type *AggTy = OrigIVI.getType();
unsigned NumAggElts;
switch (AggTy->getTypeID()) {
case Type::StructTyID:
  NumAggElts = AggTy->getStructNumElements();
  break;
case Type::ArrayTyID:
  NumAggElts = AggTy->getArrayNumElements();
  break;
default:
  llvm_unreachable("Unhandled aggregate type?")__builtin_unreachable();
}

// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
// to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
// FIXME: any interesting patterns to be caught with larger limit?
assert(NumAggElts > 0 && "Aggregate should have elements.")((void)0);
if (NumAggElts > 2)
  return nullptr;

static constexpr auto NotFound = None;
static constexpr auto FoundMismatch = nullptr;

// Try to find a value of each element of an aggregate.
// FIXME: deal with more complex, not one-dimensional, aggregate types
SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);

// Do we know values for each element of the aggregate?
auto KnowAllElts = [&AggElts]() {
  return all_of(AggElts,
                [](Optional<Instruction *> Elt) { return Elt != NotFound; });
};

int Depth = 0;

// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
// every element being overwritten twice, which should never happen.
static const int DepthLimit = 2 * NumAggElts;

// Recurse up the chain of `insertvalue` aggregate operands until either we've
// reconstructed full initializer or can't visit any more `insertvalue`'s.
for (InsertValueInst *CurrIVI = &OrigIVI;
     Depth < DepthLimit && CurrIVI && !KnowAllElts();
     CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
                     ++Depth) {
  auto *InsertedValue =
      dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
  if (!InsertedValue)
    return nullptr; // Inserted value must be produced by an instruction.

  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();

  // Don't bother with more than single-level aggregates.
  if (Indices.size() != 1)
    return nullptr; // FIXME: deal with more complex aggregates?

  // Now, we may have already previously recorded the value for this element
  // of an aggregate. If we did, that means the CurrIVI will later be
  // overwritten with the already-recorded value. But if not, let's record it!
  Optional<Instruction *> &Elt = AggElts[Indices.front()];
  Elt = Elt.getValueOr(InsertedValue);

  // FIXME: should we handle chain-terminating undef base operand?
}

// Was that sufficient to deduce the full initializer for the aggregate?
if (!KnowAllElts())
  return nullptr; // Give up then.

// We now want to find the source[s] of the aggregate elements we've found.
// And with "source" we mean the original aggregate[s] from which
// the inserted elements were extracted. This may require PHI translation.

enum class AggregateDescription {
  /// When analyzing the value that was inserted into an aggregate, we did
  /// not manage to find defining `extractvalue` instruction to analyze.
  NotFound,
  /// When analyzing the value that was inserted into an aggregate, we did
  /// manage to find defining `extractvalue` instruction[s], and everything
  /// matched perfectly - aggregate type, element insertion/extraction index.
  Found,
  /// When analyzing the value that was inserted into an aggregate, we did
  /// manage to find defining `extractvalue` instruction, but there was
  /// a mismatch: either the source type from which the extraction was didn't
  /// match the aggregate type into which the insertion was,
  /// or the extraction/insertion channels mismatched,
  /// or different elements had different source aggregates.
  FoundMismatch
};
auto Describe = [](Optional<Value *> SourceAggregate) {
  if (SourceAggregate == NotFound)
    return AggregateDescription::NotFound;
  if (*SourceAggregate == FoundMismatch)
    return AggregateDescription::FoundMismatch;
  return AggregateDescription::Found;
};

// Given the value \p Elt that was being inserted into element \p EltIdx of an
// aggregate AggTy, see if \p Elt was originally defined by an
// appropriate extractvalue (same element index, same aggregate type).
// If found, return the source aggregate from which the extraction was.
// If \p PredBB is provided, does PHI translation of an \p Elt first.
auto FindSourceAggregate =
    [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
        Optional<BasicBlock *> PredBB) -> Optional<Value *> {
  // For now(?), only deal with, at most, a single level of PHI indirection.
  if (UseBB && PredBB)
    Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
  // FIXME: deal with multiple levels of PHI indirection?

  // Did we find an extraction?
  auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
  if (!EVI)
    return NotFound;

  Value *SourceAggregate = EVI->getAggregateOperand();

  // Is the extraction from the same type into which the insertion was?
  if (SourceAggregate->getType() != AggTy)
    return FoundMismatch;
  // And the element index doesn't change between extraction and insertion?
  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
    return FoundMismatch;

  return SourceAggregate; // AggregateDescription::Found
};

// Given elements AggElts that were constructing an aggregate OrigIVI,
// see if we can find appropriate source aggregate for each of the elements,
// and see it's the same aggregate for each element. If so, return it.
auto FindCommonSourceAggregate =
    [&](Optional<BasicBlock *> UseBB,
        Optional<BasicBlock *> PredBB) -> Optional<Value *> {
  Optional<Value *> SourceAggregate;

  for (auto I : enumerate(AggElts)) {
    assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&((void)0)
           "We don't store nullptr in SourceAggregate!")((void)0);
    assert((Describe(SourceAggregate) == AggregateDescription::Found) ==((void)0)
               (I.index() != 0) &&((void)0)
           "SourceAggregate should be valid after the the first element,")((void)0);

    // For this element, is there a plausible source aggregate?
    // FIXME: we could special-case undef element, IFF we know that in the
    //        source aggregate said element isn't poison.
    Optional<Value *> SourceAggregateForElement =
        FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);

    // Okay, what have we found? Does that correlate with previous findings?

    // Regardless of whether or not we have previously found source
    // aggregate for previous elements (if any), if we didn't find one for
    // this element, passthrough whatever we have just found.
    if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
      return SourceAggregateForElement;

    // Okay, we have found source aggregate for this element.
    // Let's see what we already know from previous elements, if any.
    switch (Describe(SourceAggregate)) {
    case AggregateDescription::NotFound:
      // This is apparently the first element that we have examined.
      SourceAggregate = SourceAggregateForElement; // Record the aggregate!
      continue; // Great, now look at next element.
    case AggregateDescription::Found:
      // We have previously already successfully examined other elements.
      // Is this the same source aggregate we've found for other elements?
      if (*SourceAggregateForElement != *SourceAggregate)
        return FoundMismatch;
      continue; // Still the same aggregate, look at next element.
    case AggregateDescription::FoundMismatch:
      llvm_unreachable("Can't happen. We would have early-exited then.")__builtin_unreachable();
    };
  }

  assert(Describe(SourceAggregate) == AggregateDescription::Found &&((void)0)
         "Must be a valid Value")((void)0);
  return *SourceAggregate;
};

Optional<Value *> SourceAggregate;

// Can we find the source aggregate without looking at predecessors?
SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
    return nullptr; // Conflicting source aggregates!
  ++NumAggregateReconstructionsSimplified;
  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
}

// Okay, apparently we need to look at predecessors.

// We should be smart about picking the "use" basic block, which will be the
// merge point for aggregate, where we'll insert the final PHI that will be
// used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
// We should look in which blocks each of the AggElts is being defined,
// they all should be defined in the same basic block.
BasicBlock *UseBB = nullptr;

for (const Optional<Instruction *> &I : AggElts) {
  BasicBlock *BB = (*I)->getParent();
  // If it's the first instruction we've encountered, record the basic block.
  if (!UseBB) {
    UseBB = BB;
    continue;
  }
  // Otherwise, this must be the same basic block we've seen previously.
  if (UseBB != BB)
    return nullptr;
}

// If *all* of the elements are basic-block-independent, meaning they are
// either function arguments, or constant expressions, then if we didn't
// handle them without predecessor-aware handling, we won't handle them now.
if (!UseBB)
  return nullptr;

// If we didn't manage to find source aggregate without looking at
// predecessors, and there are no predecessors to look at, then we're done.
if (pred_empty(UseBB))
  return nullptr;

// Arbitrary predecessor count limit.
static const int PredCountLimit = 64;

// Cache the (non-uniqified!) list of predecessors in a vector,
// checking the limit at the same time for efficiency.
SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
for (BasicBlock *Pred : predecessors(UseBB)) {
  // Don't bother if there are too many predecessors.
  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
    return nullptr;
  Preds.emplace_back(Pred);
}

// For each predecessor, what is the source aggregate,
// from which all the elements were originally extracted from?
// Note that we want for the map to have stable iteration order!
SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
for (BasicBlock *Pred : Preds) {
  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
      SourceAggregates.insert({Pred, nullptr});
  // Did we already evaluate this predecessor?
  if (!IV.second)
    continue;

  // Let's hope that when coming from predecessor Pred, all elements of the
  // aggregate produced by OrigIVI must have been originally extracted from
  // the same aggregate. Is that so? Can we find said original aggregate?
  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
  if (Describe(SourceAggregate) != AggregateDescription::Found)
    return nullptr; // Give up.
  IV.first->second = *SourceAggregate;
}

// All good! Now we just need to thread the source aggregates here.
// Note that we have to insert the new PHI here, ourselves, because we can't
// rely on InstCombinerImpl::run() inserting it into the right basic block.
// Note that the same block can be a predecessor more than once,
// and we need to preserve that invariant for the PHI node.
BuilderTy::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(UseBB->getFirstNonPHI());
auto *PHI =
    Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
for (BasicBlock *Pred : Preds)
  PHI->addIncoming(SourceAggregates[Pred], Pred);

++NumAggregateReconstructionsSimplified;
return replaceInstUsesWith(OrigIVI, PHI);
1045}

1047/// Try to find redundant insertvalue instructions, like the following ones:
1048///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1049///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
1050/// Here the second instruction inserts values at the same indices, as the
1051/// first one, making the first one redundant.
1052/// It should be transformed to:
1053///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1054Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
bool IsRedundant = false;
ArrayRef<unsigned int> FirstIndices = I.getIndices();

// If there is a chain of insertvalue instructions (each of them except the
// last one has only one use and it's another insertvalue insn from this
// chain), check if any of the 'children' uses the same indices as the first
// instruction. In this case, the first one is redundant.
Value *V = &I;
unsigned Depth = 0;
while (V->hasOneUse() && Depth < 10) {
  User *U = V->user_back();
  auto UserInsInst = dyn_cast<InsertValueInst>(U);
  if (!UserInsInst || U->getOperand(0) != V)
    break;
  if (UserInsInst->getIndices() == FirstIndices) {
    IsRedundant = true;
    break;
  }
  V = UserInsInst;
  Depth++;
}

if (IsRedundant)
  return replaceInstUsesWith(I, I.getOperand(0));

if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
  return NewI;

return nullptr;
1084}

1086static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
// Can not analyze scalable type, the number of elements is not a compile-time
// constant.
if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
  return false;

int MaskSize = Shuf.getShuffleMask().size();
int VecSize =
    cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();

// A vector select does not change the size of the operands.
if (MaskSize != VecSize)
  return false;

// Each mask element must be undefined or choose a vector element from one of
// the source operands without crossing vector lanes.
for (int i = 0; i != MaskSize; ++i) {
  int Elt = Shuf.getMaskValue(i);
  if (Elt != -1 && Elt != i && Elt != i + VecSize)
    return false;
}

return true;
1109}

1111/// Turn a chain of inserts that splats a value into an insert + shuffle:
1112/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1113/// shufflevector(insertelt(X, %k, 0), poison, zero)
1114static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
// We are interested in the last insert in a chain. So if this insert has a
// single user and that user is an insert, bail.
if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
  return nullptr;

VectorType *VecTy = InsElt.getType();
// Can not handle scalable type, the number of elements is not a compile-time
// constant.
if (isa<ScalableVectorType>(VecTy))
  return nullptr;
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();

// Do not try to do this for a one-element vector, since that's a nop,
// and will cause an inf-loop.
if (NumElements == 1)
  return nullptr;

Value *SplatVal = InsElt.getOperand(1);
InsertElementInst *CurrIE = &InsElt;
SmallBitVector ElementPresent(NumElements, false);
InsertElementInst *FirstIE = nullptr;

// Walk the chain backwards, keeping track of which indices we inserted into,
// until we hit something that isn't an insert of the splatted value.
while (CurrIE) {
  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
  if (!Idx || CurrIE->getOperand(1) != SplatVal)
    return nullptr;

  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
  // Check none of the intermediate steps have any additional uses, except
  // for the root insertelement instruction, which can be re-used, if it
  // inserts at position 0.
  if (CurrIE != &InsElt &&
      (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
    return nullptr;

  ElementPresent[Idx->getZExtValue()] = true;
  FirstIE = CurrIE;
  CurrIE = NextIE;
}

// If this is just a single insertelement (not a sequence), we are done.
if (FirstIE == &InsElt)
  return nullptr;

// If we are not inserting into an undef vector, make sure we've seen an
// insert into every element.
// TODO: If the base vector is not undef, it might be better to create a splat
//       and then a select-shuffle (blend) with the base vector.
if (!match(FirstIE->getOperand(0), m_Undef()))
  if (!ElementPresent.all())
    return nullptr;

// Create the insert + shuffle.
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
PoisonValue *PoisonVec = PoisonValue::get(VecTy);
Constant *Zero = ConstantInt::get(Int32Ty, 0);
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
  FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);

// Splat from element 0, but replace absent elements with undef in the mask.
SmallVector<int, 16> Mask(NumElements, 0);
for (unsigned i = 0; i != NumElements; ++i)
  if (!ElementPresent[i])
    Mask[i] = -1;

return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
1183}

1185/// Try to fold an insert element into an existing splat shuffle by changing
1186/// the shuffle's mask to include the index of this insert element.
1187static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
// Check if the vector operand of this insert is a canonical splat shuffle.
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
if (!Shuf || !Shuf->isZeroEltSplat())
  return nullptr;

// Bail out early if shuffle is scalable type. The number of elements in
// shuffle mask is unknown at compile-time.
if (isa<ScalableVectorType>(Shuf->getType()))
  return nullptr;

// Check for a constant insertion index.
uint64_t IdxC;
if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
  return nullptr;

// Check if the splat shuffle's input is the same as this insert's scalar op.
Value *X = InsElt.getOperand(1);
Value *Op0 = Shuf->getOperand(0);
if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
  return nullptr;

// Replace the shuffle mask element at the index of this insert with a zero.
// For example:
// inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
//   --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
unsigned NumMaskElts =
    cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
for (unsigned i = 0; i != NumMaskElts; ++i)
  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);

return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
1220}

1222/// Try to fold an extract+insert element into an existing identity shuffle by
1223/// changing the shuffle's mask to include the index of this insert element.
1224static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
// Check if the vector operand of this insert is an identity shuffle.
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
    !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
  return nullptr;

// Bail out early if shuffle is scalable type. The number of elements in
// shuffle mask is unknown at compile-time.
if (isa<ScalableVectorType>(Shuf->getType()))
  return nullptr;

// Check for a constant insertion index.
uint64_t IdxC;
if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
  return nullptr;

// Check if this insert's scalar op is extracted from the identity shuffle's
// input vector.
Value *Scalar = InsElt.getOperand(1);
Value *X = Shuf->getOperand(0);
if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
  return nullptr;

// Replace the shuffle mask element at the index of this extract+insert with
// that same index value.
// For example:
// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
unsigned NumMaskElts =
    cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
ArrayRef<int> OldMask = Shuf->getShuffleMask();
for (unsigned i = 0; i != NumMaskElts; ++i) {
  if (i != IdxC) {
    // All mask elements besides the inserted element remain the same.
    NewMask[i] = OldMask[i];
  } else if (OldMask[i] == (int)IdxC) {
    // If the mask element was already set, there's nothing to do
    // (demanded elements analysis may unset it later).
    return nullptr;
  } else {
    assert(OldMask[i] == UndefMaskElem &&((void)0)
           "Unexpected shuffle mask element for identity shuffle")((void)0);
    NewMask[i] = IdxC;
  }
}

return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1272}

1274/// If we have an insertelement instruction feeding into another insertelement
1275/// and the 2nd is inserting a constant into the vector, canonicalize that
1276/// constant insertion before the insertion of a variable:
1277///
1278/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1279/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1280///
1281/// This has the potential of eliminating the 2nd insertelement instruction
1282/// via constant folding of the scalar constant into a vector constant.
1283static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
                                   InstCombiner::BuilderTy &Builder) {
auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
if (!InsElt1 || !InsElt1->hasOneUse())
  return nullptr;

Value *X, *Y;
Constant *ScalarC;
ConstantInt *IdxC1, *IdxC2;
if (match(InsElt1->getOperand(0), m_Value(X)) &&
    match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
    match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
    match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
    match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
}

return nullptr;
1302}

1304/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1305/// --> shufflevector X, CVec', Mask'
1306static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
// Bail out if the parent has more than one use. In that case, we'd be
// replacing the insertelt with a shuffle, and that's not a clear win.
if (!Inst || !Inst->hasOneUse())
  return nullptr;
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
  // The shuffle must have a constant vector operand. The insertelt must have
  // a constant scalar being inserted at a constant position in the vector.
  Constant *ShufConstVec, *InsEltScalar;
  uint64_t InsEltIndex;
  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
      !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
      !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
    return nullptr;

  // Adding an element to an arbitrary shuffle could be expensive, but a
  // shuffle that selects elements from vectors without crossing lanes is
  // assumed cheap.
  // If we're just adding a constant into that shuffle, it will still be
  // cheap.
  if (!isShuffleEquivalentToSelect(*Shuf))
    return nullptr;

  // From the above 'select' check, we know that the mask has the same number
  // of elements as the vector input operands. We also know that each constant
  // input element is used in its lane and can not be used more than once by
  // the shuffle. Therefore, replace the constant in the shuffle's constant
  // vector with the insertelt constant. Replace the constant in the shuffle's
  // mask vector with the insertelt index plus the length of the vector
  // (because the constant vector operand of a shuffle is always the 2nd
  // operand).
  ArrayRef<int> Mask = Shuf->getShuffleMask();
  unsigned NumElts = Mask.size();
  SmallVector<Constant *, 16> NewShufElts(NumElts);
  SmallVector<int, 16> NewMaskElts(NumElts);
  for (unsigned I = 0; I != NumElts; ++I) {
    if (I == InsEltIndex) {
      NewShufElts[I] = InsEltScalar;
      NewMaskElts[I] = InsEltIndex + NumElts;
    } else {
      // Copy over the existing values.
      NewShufElts[I] = ShufConstVec->getAggregateElement(I);
      NewMaskElts[I] = Mask[I];
    }
  }

  // Create new operands for a shuffle that includes the constant of the
  // original insertelt. The old shuffle will be dead now.
  return new ShuffleVectorInst(Shuf->getOperand(0),
                               ConstantVector::get(NewShufElts), NewMaskElts);
} else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
  // Transform sequences of insertelements ops with constant data/indexes into
  // a single shuffle op.
  // Can not handle scalable type, the number of elements needed to create
  // shuffle mask is not a compile-time constant.
  if (isa<ScalableVectorType>(InsElt.getType()))
    return nullptr;
  unsigned NumElts =
      cast<FixedVectorType>(InsElt.getType())->getNumElements();

  uint64_t InsertIdx[2];
  Constant *Val[2];
  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
      !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
      !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
      !match(IEI->getOperand(1), m_Constant(Val[1])))
    return nullptr;
  SmallVector<Constant *, 16> Values(NumElts);
  SmallVector<int, 16> Mask(NumElts);
  auto ValI = std::begin(Val);
  // Generate new constant vector and mask.
  // We have 2 values/masks from the insertelements instructions. Insert them
  // into new value/mask vectors.
  for (uint64_t I : InsertIdx) {
    if (!Values[I]) {
      Values[I] = *ValI;
      Mask[I] = NumElts + I;
    }
    ++ValI;
  }
  // Remaining values are filled with 'undef' values.
  for (unsigned I = 0; I < NumElts; ++I) {
    if (!Values[I]) {
      Values[I] = UndefValue::get(InsElt.getType()->getElementType());
      Mask[I] = I;
    }
  }
  // Create new operands for a shuffle that includes the constant of the
  // original insertelt.
  return new ShuffleVectorInst(IEI->getOperand(0),
                               ConstantVector::get(Values), Mask);
}
return nullptr;
1400}

1402Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp    = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp    = IE.getOperand(2);

if (auto *V = SimplifyInsertElementInst(
        VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
  return replaceInstUsesWith(IE, V);

// If the scalar is bitcast and inserted into undef, do the insert in the
// source type followed by bitcast.
// TODO: Generalize for insert into any constant, not just undef?
Value *ScalarSrc;
if (match(VecOp, m_Undef()) &&
    match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
    (ScalarSrc->getType()->isIntegerTy() ||
     ScalarSrc->getType()->isFloatingPointTy())) {
  // inselt undef, (bitcast ScalarSrc), IdxOp -->
  //   bitcast (inselt undef, ScalarSrc, IdxOp)
  Type *ScalarTy = ScalarSrc->getType();
  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
  UndefValue *NewUndef = UndefValue::get(VecTy);
  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
  return new BitCastInst(NewInsElt, IE.getType());
}

// If the vector and scalar are both bitcast from the same element type, do
// the insert in that source type followed by bitcast.
Value *VecSrc;
if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
    match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
    (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
    VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
    cast<VectorType>(VecSrc->getType())->getElementType() ==
        ScalarSrc->getType()) {
  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
  //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)
  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
  return new BitCastInst(NewInsElt, IE.getType());
}

// If the inserted element was extracted from some other fixed-length vector
// and both indexes are valid constants, try to turn this into a shuffle.
// Can not handle scalable vector type, the number of elements needed to
// create shuffle mask is not a compile-time constant.
uint64_t InsertedIdx, ExtractedIdx;
Value *ExtVecOp;
if (isa<FixedVectorType>(IE.getType()) &&
    match(IdxOp, m_ConstantInt(InsertedIdx)) &&
    match(ScalarOp,
          m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
    isa<FixedVectorType>(ExtVecOp->getType()) &&
    ExtractedIdx <
        cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
  // TODO: Looking at the user(s) to determine if this insert is a
  // fold-to-shuffle opportunity does not match the usual instcombine
  // constraints. We should decide if the transform is worthy based only
  // on this instruction and its operands, but that may not work currently.
  //
  // Here, we are trying to avoid creating shuffles before reaching
  // the end of a chain of extract-insert pairs. This is complicated because
  // we do not generally form arbitrary shuffle masks in instcombine
  // (because those may codegen poorly), but collectShuffleElements() does
  // exactly that.
  //
  // The rules for determining what is an acceptable target-independent
  // shuffle mask are fuzzy because they evolve based on the backend's
  // capabilities and real-world impact.
  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
    if (!Insert.hasOneUse())
      return true;
    auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
    if (!InsertUser)
      return true;
    return false;
  };

  // Try to form a shuffle from a chain of extract-insert ops.
  if (isShuffleRootCandidate(IE)) {
    SmallVector<int, 16> Mask;
    ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);

    // The proposed shuffle may be trivial, in which case we shouldn't
    // perform the combine.
    if (LR.first != &IE && LR.second != &IE) {
      // We now have a shuffle of LHS, RHS, Mask.
      if (LR.second == nullptr)
        LR.second = UndefValue::get(LR.first->getType());
      return new ShuffleVectorInst(LR.first, LR.second, Mask);
    }
  }
}

if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
  unsigned VWidth = VecTy->getNumElements();
  APInt UndefElts(VWidth, 0);
  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
    if (V != &IE)
      return replaceInstUsesWith(IE, V);
    return &IE;
  }
}

if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
  return Shuf;

if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
  return NewInsElt;

if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
  return Broadcast;

if (Instruction *Splat = foldInsEltIntoSplat(IE))
  return Splat;

if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
  return IdentityShuf;

return nullptr;
1522}

1524/// Return true if we can evaluate the specified expression tree if the vector
1525/// elements were shuffled in a different order.
1526static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
                              unsigned Depth = 5) {
// We can always reorder the elements of a constant.
if (isa<Constant>(V))
  return true;

// We won't reorder vector arguments. No IPO here.
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;

// Two users may expect different orders of the elements. Don't try it.
if (!I->hasOneUse())
  return false;

if (Depth == 0) return false;

switch (I->getOpcode()) {
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::URem:
  case Instruction::SRem:
    // Propagating an undefined shuffle mask element to integer div/rem is not
    // allowed because those opcodes can create immediate undefined behavior
    // from an undefined element in an operand.
    if (llvm::is_contained(Mask, -1))
      return false;
    LLVM_FALLTHROUGH[[gnu::fallthrough]];
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::FDiv:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::ICmp:
  case Instruction::FCmp:
  case Instruction::Trunc:
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::UIToFP:
  case Instruction::SIToFP:
  case Instruction::FPTrunc:
  case Instruction::FPExt:
  case Instruction::GetElementPtr: {
    // Bail out if we would create longer vector ops. We could allow creating
    // longer vector ops, but that may result in more expensive codegen.
    Type *ITy = I->getType();
    if (ITy->isVectorTy() &&
        Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
      return false;
    for (Value *Operand : I->operands()) {
      if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
        return false;
    }
    return true;
  }
  case Instruction::InsertElement: {
    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
    if (!CI) return false;
    int ElementNumber = CI->getLimitedValue();

    // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
    // can't put an element into multiple indices.
    bool SeenOnce = false;
    for (int i = 0, e = Mask.size(); i != e; ++i) {
      if (Mask[i] == ElementNumber) {
        if (SeenOnce)
          return false;
        SeenOnce = true;
      }
    }
    return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
  }
}
return false;
1610}

1612/// Rebuild a new instruction just like 'I' but with the new operands given.
1613/// In the event of type mismatch, the type of the operands is correct.
1614static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
// We don't want to use the IRBuilder here because we want the replacement
// instructions to appear next to 'I', not the builder's insertion point.
switch (I->getOpcode()) {
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    BinaryOperator *BO = cast<BinaryOperator>(I);
    assert(NewOps.size() == 2 && "binary operator with #ops != 2")((void)0);
    BinaryOperator *New =
        BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
                               NewOps[0], NewOps[1], "", BO);
    if (isa<OverflowingBinaryOperator>(BO)) {
      New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
      New->setHasNoSignedWrap(BO->hasNoSignedWrap());
    }
    if (isa<PossiblyExactOperator>(BO)) {
      New->setIsExact(BO->isExact());
    }
    if (isa<FPMathOperator>(BO))
      New->copyFastMathFlags(I);
    return New;
  }
  case Instruction::ICmp:
    assert(NewOps.size() == 2 && "icmp with #ops != 2")((void)0);
    return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
                        NewOps[0], NewOps[1]);
  case Instruction::FCmp:
    assert(NewOps.size() == 2 && "fcmp with #ops != 2")((void)0);
    return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
                        NewOps[0], NewOps[1]);
  case Instruction::Trunc:
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::UIToFP:
  case Instruction::SIToFP:
  case Instruction::FPTrunc:
  case Instruction::FPExt: {
    // It's possible that the mask has a different number of elements from
    // the original cast. We recompute the destination type to match the mask.
    Type *DestTy = VectorType::get(
        I->getType()->getScalarType(),
        cast<VectorType>(NewOps[0]->getType())->getElementCount());
    assert(NewOps.size() == 1 && "cast with #ops != 1")((void)0);
    return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
                            "", I);
  }
  case Instruction::GetElementPtr: {
    Value *Ptr = NewOps[0];
    ArrayRef<Value*> Idx = NewOps.slice(1);
    GetElementPtrInst *GEP = GetElementPtrInst::Create(
        cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
    GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
    return GEP;
  }
}
llvm_unreachable("failed to rebuild vector instructions")__builtin_unreachable();
1688}

1690static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// Mask.size() does not need to be equal to the number of vector elements.

assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")((void)0);
Type *EltTy = V->getType()->getScalarType();
Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
if (match(V, m_Undef()))
  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));

if (isa<ConstantAggregateZero>(V))
  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));

if (Constant *C = dyn_cast<Constant>(V))
  return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
                                        Mask);

Instruction *I = cast<Instruction>(V);
switch (I->getOpcode()) {
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::ICmp:
  case Instruction::FCmp:
  case Instruction::Trunc:
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::UIToFP:
  case Instruction::SIToFP:
  case Instruction::FPTrunc:
  case Instruction::FPExt:
  case Instruction::Select:
  case Instruction::GetElementPtr: {
    SmallVector<Value*, 8> NewOps;
    bool NeedsRebuild =
        (Mask.size() !=
         cast<FixedVectorType>(I->getType())->getNumElements());
    for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
      Value *V;
      // Recursively call evaluateInDifferentElementOrder on vector arguments
      // as well. E.g. GetElementPtr may have scalar operands even if the
      // return value is a vector, so we need to examine the operand type.
      if (I->getOperand(i)->getType()->isVectorTy())
        V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
      else
        V = I->getOperand(i);
      NewOps.push_back(V);
      NeedsRebuild |= (V != I->getOperand(i));
    }
    if (NeedsRebuild) {
      return buildNew(I, NewOps);
    }
    return I;
  }
  case Instruction::InsertElement: {
    int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();

    // The insertelement was inserting at Element. Figure out which element
    // that becomes after shuffling. The answer is guaranteed to be unique
    // by CanEvaluateShuffled.
    bool Found = false;
    int Index = 0;
    for (int e = Mask.size(); Index != e; ++Index) {
      if (Mask[Index] == Element) {
        Found = true;
        break;
      }
    }

    // If element is not in Mask, no need to handle the operand 1 (element to
    // be inserted). Just evaluate values in operand 0 according to Mask.
    if (!Found)
      return evaluateInDifferentElementOrder(I->getOperand(0), Mask);

    Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
    return InsertElementInst::Create(V, I->getOperand(1),
                                     ConstantInt::get(I32Ty, Index), "", I);
  }
}
llvm_unreachable("failed to reorder elements of vector instruction!")__builtin_unreachable();
1786}

1788// Returns true if the shuffle is extracting a contiguous range of values from
1789// LHS, for example:
1790//                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1791//   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1792//   Shuffles to:  |EE|FF|GG|HH|
1793//                 +--+--+--+--+
1794static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
                                     ArrayRef<int> Mask) {
unsigned LHSElems =
    cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
unsigned MaskElems = Mask.size();
unsigned BegIdx = Mask.front();
unsigned EndIdx = Mask.back();
if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
  return false;
for (unsigned I = 0; I != MaskElems; ++I)
  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
    return false;
return true;
1807}

1809/// These are the ingredients in an alternate form binary operator as described
1810/// below.
1811struct BinopElts {
BinaryOperator::BinaryOps Opcode;
Value *Op0;
Value *Op1;
BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
          Value *V0 = nullptr, Value *V1 = nullptr) :
    Opcode(Opc), Op0(V0), Op1(V1) {}
operator bool() const { return Opcode != 0; }
1819};

1821/// Binops may be transformed into binops with different opcodes and operands.
1822/// Reverse the usual canonicalization to enable folds with the non-canonical
1823/// form of the binop. If a transform is possible, return the elements of the
1824/// new binop. If not, return invalid elements.
1825static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
Type *Ty = BO->getType();
switch (BO->getOpcode()) {
  case Instruction::Shl: {
    // shl X, C --> mul X, (1 << C)
    Constant *C;
    if (match(BO1, m_Constant(C))) {
      Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
      return { Instruction::Mul, BO0, ShlOne };
    }
    break;
  }
  case Instruction::Or: {
    // or X, C --> add X, C (when X and C have no common bits set)
    const APInt *C;
    if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
      return { Instruction::Add, BO0, BO1 };
    break;
  }
  default:
    break;
}
return {};
1849}

1851static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
assert(Shuf.isSelect() && "Must have select-equivalent shuffle")((void)0);

// Are we shuffling together some value and that same value after it has been
// modified by a binop with a constant?
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
Constant *C;
bool Op0IsBinop;
if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
  Op0IsBinop = true;
else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
  Op0IsBinop = false;
else
  return nullptr;

// The identity constant for a binop leaves a variable operand unchanged. For
// a vector, this is a splat of something like 0, -1, or 1.
// If there's no identity constant for this binop, we're done.
auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
if (!IdC)
  return nullptr;

// Shuffle identity constants into the lanes that return the original value.
// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
// The existing binop constant vector remains in the same operand position.
ArrayRef<int> Mask = Shuf.getShuffleMask();
Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
                              ConstantExpr::getShuffleVector(IdC, C, Mask);

bool MightCreatePoisonOrUB =
    is_contained(Mask, UndefMaskElem) &&
    (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
if (MightCreatePoisonOrUB)
  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);

// shuf (bop X, C), X, M --> bop X, C'
// shuf X, (bop X, C), M --> bop X, C'
Value *X = Op0IsBinop ? Op1 : Op0;
Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
NewBO->copyIRFlags(BO);

// An undef shuffle mask element may propagate as an undef constant element in
// the new binop. That would produce poison where the original code might not.
// If we already made a safe constant, then there's no danger.
if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
  NewBO->dropPoisonGeneratingFlags();
return NewBO;
1901}

1903/// If we have an insert of a scalar to a non-zero element of an undefined
1904/// vector and then shuffle that value, that's the same as inserting to the zero
1905/// element and shuffling. Splatting from the zero element is recognized as the
1906/// canonical form of splat.
1907static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
                                          InstCombiner::BuilderTy &Builder) {
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
ArrayRef<int> Mask = Shuf.getShuffleMask();
Value *X;
uint64_t IndexC;

// Match a shuffle that is a splat to a non-zero element.
if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
                                     m_ConstantInt(IndexC)))) ||
    !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
  return nullptr;

// Insert into element 0 of an undef vector.
UndefValue *UndefVec = UndefValue::get(Shuf.getType());
Constant *Zero = Builder.getInt32(0);
Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);

// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
// shuf (inselt undef, X, 2), undef, <2,2,undef>
//   --> shuf (inselt undef, X, 0), undef, <0,0,undef>
unsigned NumMaskElts =
    cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
  if (Mask[i] == UndefMaskElem)
    NewMask[i] = Mask[i];

return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
1937}

1939/// Try to fold shuffles that are the equivalent of a vector select.
1940static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
                                    InstCombiner::BuilderTy &Builder,
                                    const DataLayout &DL) {
if (!Shuf.isSelect())
  return nullptr;

// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
// Commuting undef to operand 0 conflicts with another canonicalization.
unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
if (!match(Shuf.getOperand(1), m_Undef()) &&
    Shuf.getMaskValue(0) >= (int)NumElts) {
  // TODO: Can we assert that both operands of a shuffle-select are not undef
  // (otherwise, it would have been folded by instsimplify?
  Shuf.commute();
  return &Shuf;
}

if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
  return I;

BinaryOperator *B0, *B1;
if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
    !match(Shuf.getOperand(1), m_BinOp(B1)))
  return nullptr;

Value *X, *Y;
Constant *C0, *C1;
bool ConstantsAreOp1;
if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
    match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
  ConstantsAreOp1 = true;
else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
         match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
  ConstantsAreOp1 = false;
else
  return nullptr;

// We need matching binops to fold the lanes together.
BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
bool DropNSW = false;
if (ConstantsAreOp1 && Opc0 != Opc1) {
  // TODO: We drop "nsw" if shift is converted into multiply because it may
  // not be correct when the shift amount is BitWidth - 1. We could examine
  // each vector element to determine if it is safe to keep that flag.
  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
    DropNSW = true;
  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
    assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")((void)0);
    Opc0 = AltB0.Opcode;
    C0 = cast<Constant>(AltB0.Op1);
  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
    assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")((void)0);
    Opc1 = AltB1.Opcode;
    C1 = cast<Constant>(AltB1.Op1);
  }
}

if (Opc0 != Opc1)
  return nullptr;

// The opcodes must be the same. Use a new name to make that clear.
BinaryOperator::BinaryOps BOpc = Opc0;

// Select the constant elements needed for the single binop.
ArrayRef<int> Mask = Shuf.getShuffleMask();
Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);

// We are moving a binop after a shuffle. When a shuffle has an undefined
// mask element, the result is undefined, but it is not poison or undefined
// behavior. That is not necessarily true for div/rem/shift.
bool MightCreatePoisonOrUB =
    is_contained(Mask, UndefMaskElem) &&
    (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
if (MightCreatePoisonOrUB)
  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
                                                     ConstantsAreOp1);

Value *V;
if (X == Y) {
  // Remove a binop and the shuffle by rearranging the constant:
  // shuffle (op V, C0), (op V, C1), M --> op V, C'
  // shuffle (op C0, V), (op C1, V), M --> op C', V
  V = X;
} else {
  // If there are 2 different variable operands, we must create a new shuffle
  // (select) first, so check uses to ensure that we don't end up with more
  // instructions than we started with.
  if (!B0->hasOneUse() && !B1->hasOneUse())
    return nullptr;

  // If we use the original shuffle mask and op1 is *variable*, we would be
  // putting an undef into operand 1 of div/rem/shift. This is either UB or
  // poison. We do not have to guard against UB when *constants* are op1
  // because safe constants guarantee that we do not overflow sdiv/srem (and
  // there's no danger for other opcodes).
  // TODO: To allow this case, create a new shuffle mask with no undefs.
  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
    return nullptr;

  // Note: In general, we do not create new shuffles in InstCombine because we
  // do not know if a target can lower an arbitrary shuffle optimally. In this
  // case, the shuffle uses the existing mask, so there is no additional risk.

  // Select the variable vectors first, then perform the binop:
  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
  V = Builder.CreateShuffleVector(X, Y, Mask);
}

Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
                                       BinaryOperator::Create(BOpc, NewC, V);

// Flags are intersected from the 2 source binops. But there are 2 exceptions:
// 1. If we changed an opcode, poison conditions might have changed.
// 2. If the shuffle had undef mask elements, the new binop might have undefs
//    where the original code did not. But if we already made a safe constant,
//    then there's no danger.
NewBO->copyIRFlags(B0);
NewBO->andIRFlags(B1);
if (DropNSW)
  NewBO->setHasNoSignedWrap(false);
if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
  NewBO->dropPoisonGeneratingFlags();
return NewBO;
2065}

2067/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2068/// Example (little endian):
2069/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2070static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
                                   bool IsBigEndian) {
// This must be a bitcasted shuffle of 1 vector integer operand.
Type *DestType = Shuf.getType();
Value *X;
if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
    !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
  return nullptr;

// The source type must have the same number of elements as the shuffle,
// and the source element type must be larger than the shuffle element type.
Type *SrcType = X->getType();
if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
    cast<FixedVectorType>(SrcType)->getNumElements() !=
        cast<FixedVectorType>(DestType)->getNumElements() ||
    SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
  return nullptr;

assert(Shuf.changesLength() && !Shuf.increasesLength() &&((void)0)
       "Expected a shuffle that decreases length")((void)0);

// Last, check that the mask chooses the correct low bits for each narrow
// element in the result.
uint64_t TruncRatio =
    SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
ArrayRef<int> Mask = Shuf.getShuffleMask();
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
  if (Mask[i] == UndefMaskElem)
    continue;
  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits")((void)0);
  if (Mask[i] != (int)LSBIndex)
    return nullptr;
}

return new TruncInst(X, DestType);
2106}

2108/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2109/// narrowing (concatenating with undef and extracting back to the original
2110/// length). This allows replacing the wide select with a narrow select.
2111static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
                                     InstCombiner::BuilderTy &Builder) {
// This must be a narrowing identity shuffle. It extracts the 1st N elements
// of the 1st vector operand of a shuffle.
if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
  return nullptr;

// The vector being shuffled must be a vector select that we can eliminate.
// TODO: The one-use requirement could be eased if X and/or Y are constants.
Value *Cond, *X, *Y;
if (!match(Shuf.getOperand(0),
           m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
  return nullptr;

// We need a narrow condition value. It must be extended with undef elements
// and have the same number of elements as this shuffle.
unsigned NarrowNumElts =
    cast<FixedVectorType>(Shuf.getType())->getNumElements();
Value *NarrowCond;
if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
    cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
        NarrowNumElts ||
    !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
  return nullptr;

// shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
// sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2141}

2143/// Try to fold an extract subvector operation.
2144static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
  return nullptr;

// Check if we are extracting all bits of an inserted scalar:
// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
Value *X;
if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
    X->getType()->getPrimitiveSizeInBits() ==
        Shuf.getType()->getPrimitiveSizeInBits())
  return new BitCastInst(X, Shuf.getType());

// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
Value *Y;
ArrayRef<int> Mask;
if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
  return nullptr;

// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
// then combining may result in worse codegen.
if (!Op0->hasOneUse())
  return nullptr;

// We are extracting a subvector from a shuffle. Remove excess elements from
// the 1st shuffle mask to eliminate the extract.
//
// This transform is conservatively limited to identity extracts because we do
// not allow arbitrary shuffle mask creation as a target-independent transform
// (because we can't guarantee that will lower efficiently).
//
// If the extracting shuffle has an undef mask element, it transfers to the
// new shuffle mask. Otherwise, copy the original mask element. Example:
//   shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
//   shuf X, Y, <C0, undef, C2, undef>
unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumElts);
assert(NumElts < Mask.size() &&((void)0)
       "Identity with extract must have less elements than its inputs")((void)0);

for (unsigned i = 0; i != NumElts; ++i) {
  int ExtractMaskElt = Shuf.getMaskValue(i);
  int MaskElt = Mask[i];
  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
}
return new ShuffleVectorInst(X, Y, NewMask);
2190}

2192/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2193/// operand with the operand of an insertelement.
2194static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
                                        InstCombinerImpl &IC) {
Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
SmallVector<int, 16> Mask;
Shuf.getShuffleMask(Mask);

// The shuffle must not change vector sizes.
// TODO: This restriction could be removed if the insert has only one use
//       (because the transform would require a new length-changing shuffle).
int NumElts = Mask.size();
if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
  return nullptr;

// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
// not be able to handle it there if the insertelement has >1 use.
// If the shuffle has an insertelement operand but does not choose the
// inserted scalar element from that value, then we can replace that shuffle
// operand with the source vector of the insertelement.
Value *X;
uint64_t IdxC;
if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
  if (!is_contained(Mask, (int)IdxC))
    return IC.replaceOperand(Shuf, 0, X);
}
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
  // Offset the index constant by the vector width because we are checking for
  // accesses to the 2nd vector input of the shuffle.
  IdxC += NumElts;
  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
  if (!is_contained(Mask, (int)IdxC))
    return IC.replaceOperand(Shuf, 1, X);
}

// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
  // We need an insertelement with a constant index.
  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
                             m_ConstantInt(IndexC))))
    return false;

  // Test the shuffle mask to see if it splices the inserted scalar into the
  // operand 1 vector of the shuffle.
  int NewInsIndex = -1;
  for (int i = 0; i != NumElts; ++i) {
    // Ignore undef mask elements.
    if (Mask[i] == -1)
      continue;

    // The shuffle takes elements of operand 1 without lane changes.
    if (Mask[i] == NumElts + i)
      continue;

    // The shuffle must choose the inserted scalar exactly once.
    if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
      return false;

    // The shuffle is placing the inserted scalar into element i.
    NewInsIndex = i;
  }

  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")((void)0);

  // Index is updated to the potentially translated insertion lane.
  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
  return true;
};

// If the shuffle is unnecessary, insert the scalar operand directly into
// operand 1 of the shuffle. Example:
// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
Value *Scalar;
ConstantInt *IndexC;
if (isShufflingScalarIntoOp1(Scalar, IndexC))
  return InsertElementInst::Create(V1, Scalar, IndexC);

// Try again after commuting shuffle. Example:
// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
std::swap(V0, V1);
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
if (isShufflingScalarIntoOp1(Scalar, IndexC))
  return InsertElementInst::Create(V1, Scalar, IndexC);

return nullptr;
2279}

2281static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
// Match the operands as identity with padding (also known as concatenation
// with undef) shuffles of the same source type. The backend is expected to
// recreate these concatenations from a shuffle of narrow operands.
auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
    !Shuffle1 || !Shuffle1->isIdentityWithPadding())
  return nullptr;

// We limit this transform to power-of-2 types because we expect that the
// backend can convert the simplified IR patterns to identical nodes as the
// original IR.
// TODO: If we can verify the same behavior for arbitrary types, the
//       power-of-2 checks can be removed.
Value *X = Shuffle0->getOperand(0);
Value *Y = Shuffle1->getOperand(0);
if (X->getType() != Y->getType() ||
    !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
    !isPowerOf2_32(
        cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
    !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
    match(X, m_Undef()) || match(Y, m_Undef()))
  return nullptr;
assert(match(Shuffle0->getOperand(1), m_Undef()) &&((void)0)
       match(Shuffle1->getOperand(1), m_Undef()) &&((void)0)
       "Unexpected operand for identity shuffle")((void)0);

// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
// operands directly by adjusting the shuffle mask to account for the narrower
// types:
// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
assert(WideElts > NarrowElts && "Unexpected types for identity with padding")((void)0);

ArrayRef<int> Mask = Shuf.getShuffleMask();
SmallVector<int, 16> NewMask(Mask.size(), -1);
for (int i = 0, e = Mask.size(); i != e; ++i) {
  if (Mask[i] == -1)
    continue;

  // If this shuffle is choosing an undef element from 1 of the sources, that
  // element is undef.
  if (Mask[i] < WideElts) {
    if (Shuffle0->getMaskValue(Mask[i]) == -1)
      continue;
  } else {
    if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
      continue;
  }

  // If this shuffle is choosing from the 1st narrow op, the mask element is
  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
  // element is offset down to adjust for the narrow vector widths.
  if (Mask[i] < WideElts) {
    assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")((void)0);
    NewMask[i] = Mask[i];
  } else {
    assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")((void)0);
    NewMask[i] = Mask[i] - (WideElts - NarrowElts);
  }
}
return new ShuffleVectorInst(X, Y, NewMask);
2345}

2347Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
                                        SVI.getType(), ShufQuery))
  return replaceInstUsesWith(SVI, V);

// Bail out for scalable vectors
if (isa<ScalableVectorType>(LHS->getType()))
  return nullptr;

unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();

// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
//
// if X and Y are of the same (vector) type, and the element size is not
// changed by the bitcasts, we can distribute the bitcasts through the
// shuffle, hopefully reducing the number of instructions. We make sure that
// at least one bitcast only has one use, so we don't *increase* the number of
// instructions here.
Value *X, *Y;
if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
    X->getType()->isVectorTy() && X->getType() == Y->getType() &&
    X->getType()->getScalarSizeInBits() ==
        SVI.getType()->getScalarSizeInBits() &&
    (LHS->hasOneUse() || RHS->hasOneUse())) {
  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
                                         SVI.getName() + ".uncasted");
  return new BitCastInst(V, SVI.getType());
}

ArrayRef<int> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());

// Peek through a bitcasted shuffle operand by scaling the mask. If the
// simulated shuffle can simplify, then this shuffle is unnecessary:
// shuf (bitcast X), undef, Mask --> bitcast X'
// TODO: This could be extended to allow length-changing shuffles.
//       The transform might also be obsoleted if we allowed canonicalization
//       of bitcasted shuffles.
if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
    X->getType()->isVectorTy() && VWidth == LHSWidth) {
  // Try to create a scaled mask constant.
  auto *XType = cast<FixedVectorType>(X->getType());
  unsigned XNumElts = XType->getNumElements();
  SmallVector<int, 16> ScaledMask;
  if (XNumElts >= VWidth) {
    assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast")((void)0);
    narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
  } else {
    assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast")((void)0);
    if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
      ScaledMask.clear();
  }
  if (!ScaledMask.empty()) {
    // If the shuffled source vector simplifies, cast that value to this
    // shuffle's type.
    if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
                                            ScaledMask, XType, ShufQuery))
      return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
  }
}

// shuffle x, x, mask --> shuffle x, undef, mask'
if (LHS == RHS) {
  assert(!match(RHS, m_Undef()) &&((void)0)
         "Shuffle with 2 undef ops not simplified?")((void)0);
  // Remap any references to RHS to use LHS.
  SmallVector<int, 16> Elts;
  for (unsigned i = 0; i != VWidth; ++i) {
    // Propagate undef elements or force mask to LHS.
    if (Mask[i] < 0)
      Elts.push_back(UndefMaskElem);
    else
      Elts.push_back(Mask[i] % LHSWidth);
  }
  return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
}

// shuffle undef, x, mask --> shuffle x, undef, mask'
if (match(LHS, m_Undef())) {
  SVI.commute();
  return &SVI;
}

if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
  return I;

if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
  return I;

if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
  return I;

if (Instruction *I = narrowVectorSelect(SVI, Builder))
  return I;

APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
  if (V != &SVI)
    return replaceInstUsesWith(SVI, V);
  return &SVI;
}

if (Instruction *I = foldIdentityExtractShuffle(SVI))
  return I;

// These transforms have the potential to lose undef knowledge, so they are
// intentionally placed after SimplifyDemandedVectorElts().
if (Instruction *I = foldShuffleWithInsert(SVI, *this))
  return I;
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
  return I;

if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
  Value *V = evaluateInDifferentElementOrder(LHS, Mask);
  return replaceInstUsesWith(SVI, V);
}

// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
// a non-vector type. We can instead bitcast the original vector followed by
// an extract of the desired element:
//
//   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
//                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
//   %1 = bitcast <4 x i8> %sroa to i32
// Becomes:
//   %bc = bitcast <16 x i8> %in to <4 x i32>
//   %ext = extractelement <4 x i32> %bc, i32 0
//
// If the shuffle is extracting a contiguous range of values from the input
// vector then each use which is a bitcast of the extracted size can be
// replaced. This will work if the vector types are compatible, and the begin
// index is aligned to a value in the casted vector type. If the begin index
// isn't aligned then we can shuffle the original vector (keeping the same
// vector type) before extracting.
//
// This code will bail out if the target type is fundamentally incompatible
// with vectors of the source type.
//
// Example of <16 x i8>, target type i32:
// Index range [4,8):         v-----------v Will work.
//                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
//     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
//     <4 x i32>: |           |           |           |           |
//                +-----------+-----------+-----------+-----------+
// Index range [6,10):              ^-----------^ Needs an extra shuffle.
// Target type i40:           ^--------------^ Won't work, bail.
bool MadeChange = false;
if (isShuffleExtractingFromLHS(SVI, Mask)) {
  Value *V = LHS;
  unsigned MaskElems = Mask.size();
  auto *SrcTy = cast<FixedVectorType>(V->getType());
  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
  assert(SrcElemBitWidth && "vector elements must have a bitwidth")((void)0);
  unsigned SrcNumElems = SrcTy->getNumElements();
  SmallVector<BitCastInst *, 8> BCs;
  DenseMap<Type *, Value *> NewBCs;
  for (User *U : SVI.users())
    if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
      if (!BC->use_empty())
        // Only visit bitcasts that weren't previously handled.
        BCs.push_back(BC);
  for (BitCastInst *BC : BCs) {
    unsigned BegIdx = Mask.front();
    Type *TgtTy = BC->getDestTy();
    unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
    if (!TgtElemBitWidth)
      continue;
    unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
    bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
    bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
    if (!VecBitWidthsEqual)
      continue;
    if (!VectorType::isValidElementType(TgtTy))
      continue;
    auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
    if (!BegIsAligned) {
      // Shuffle the input so [0,NumElements) contains the output, and
      // [NumElems,SrcNumElems) is undef.
      SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
      for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
        ShuffleMask[I] = Idx;
      V = Builder.CreateShuffleVector(V, ShuffleMask,
                                      SVI.getName() + ".extract");
      BegIdx = 0;
    }
    unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
    assert(SrcElemsPerTgtElem)((void)0);
    BegIdx /= SrcElemsPerTgtElem;
    bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
    auto *NewBC =
        BCAlreadyExists
            ? NewBCs[CastSrcTy]
            : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
    if (!BCAlreadyExists)
      NewBCs[CastSrcTy] = NewBC;
    auto *Ext = Builder.CreateExtractElement(
        NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
    // The shufflevector isn't being replaced: the bitcast that used it
    // is. InstCombine will visit the newly-created instructions.
    replaceInstUsesWith(*BC, Ext);
    MadeChange = true;
  }
}

// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
// 1.
// x1=shuffle(v1,v2,mask1)
//  x=shuffle(x1,undef,mask)
//        ==>
//  x=shuffle(v1,undef,newMask)
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
// 2.
// x1=shuffle(v1,undef,mask1)
//  x=shuffle(x1,x2,mask)
// where v1.size() == mask1.size()
//        ==>
//  x=shuffle(v1,x2,newMask)
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
// 3.
// x2=shuffle(v2,undef,mask2)
//  x=shuffle(x1,x2,mask)
// where v2.size() == mask2.size()
//        ==>
//  x=shuffle(x1,v2,newMask)
// newMask[i] = (mask[i] < x1.size())
//              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
// 4.
// x1=shuffle(v1,undef,mask1)
// x2=shuffle(v2,undef,mask2)
//  x=shuffle(x1,x2,mask)
// where v1.size() == v2.size()
//        ==>
//  x=shuffle(v1,v2,newMask)
// newMask[i] = (mask[i] < x1.size())
//              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
//
// Here we are really conservative:
// we are absolutely afraid of producing a shuffle mask not in the input
// program, because the code gen may not be smart enough to turn a merged
// shuffle into two specific shuffles: it may produce worse code.  As such,
// we only merge two shuffles if the result is either a splat or one of the
// input shuffle masks.  In this case, merging the shuffles just removes
// one instruction, which we know is safe.  This is good for things like
// turning: (splat(splat)) -> splat, or
// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
    LHSShuffle = nullptr;
if (RHSShuffle)
  if (!match(RHSShuffle->getOperand(1), m_Undef()))
    RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
  return MadeChange ? &SVI : nullptr;

Value* LHSOp0 = nullptr;
Value* LHSOp1 = nullptr;
Value* RHSOp0 = nullptr;
unsigned LHSOp0Width = 0;
unsigned RHSOp0Width = 0;
if (LHSShuffle) {
  LHSOp0 = LHSShuffle->getOperand(0);
  LHSOp1 = LHSShuffle->getOperand(1);
  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
}
if (RHSShuffle) {
  RHSOp0 = RHSShuffle->getOperand(0);
  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
}
Value* newLHS = LHS;
Value* newRHS = RHS;
if (LHSShuffle) {
  // case 1
  if (match(RHS, m_Undef())) {
    newLHS = LHSOp0;
    newRHS = LHSOp1;
  }
  // case 2 or 4
  else if (LHSOp0Width == LHSWidth) {
    newLHS = LHSOp0;
  }
}
// case 3 or 4
if (RHSShuffle && RHSOp0Width == LHSWidth) {
  newRHS = RHSOp0;
}
// case 4
if (LHSOp0 == RHSOp0) {
  newLHS = LHSOp0;
  newRHS = nullptr;
}

if (newLHS == LHS && newRHS == RHS)
  return MadeChange ? &SVI : nullptr;

ArrayRef<int> LHSMask;
ArrayRef<int> RHSMask;
if (newLHS != LHS)
  LHSMask = LHSShuffle->getShuffleMask();
if (RHSShuffle && newRHS != RHS)
  RHSMask = RHSShuffle->getShuffleMask();

unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
SmallVector<int, 16> newMask;
bool isSplat = true;
int SplatElt = -1;
// Create a new mask for the new ShuffleVectorInst so that the new
// ShuffleVectorInst is equivalent to the original one.
for (unsigned i = 0; i < VWidth; ++i) {
  int eltMask;
  if (Mask[i] < 0) {
    // This element is an undef value.
    eltMask = -1;
  } else if (Mask[i] < (int)LHSWidth) {
    // This element is from left hand side vector operand.
    //
    // If LHS is going to be replaced (case 1, 2, or 4), calculate the
    // new mask value for the element.
    if (newLHS != LHS) {
      eltMask = LHSMask[Mask[i]];
      // If the value selected is an undef value, explicitly specify it
      // with a -1 mask value.
      if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
        eltMask = -1;
    } else
      eltMask = Mask[i];
  } else {
    // This element is from right hand side vector operand
    //
    // If the value selected is an undef value, explicitly specify it
    // with a -1 mask value. (case 1)
    if (match(RHS, m_Undef()))
      eltMask = -1;
    // If RHS is going to be replaced (case 3 or 4), calculate the
    // new mask value for the element.
    else if (newRHS != RHS) {
      eltMask = RHSMask[Mask[i]-LHSWidth];
      // If the value selected is an undef value, explicitly specify it
      // with a -1 mask value.
      if (eltMask >= (int)RHSOp0Width) {
        assert(match(RHSShuffle->getOperand(1), m_Undef()) &&((void)0)
               "should have been check above")((void)0);
        eltMask = -1;
      }
    } else
      eltMask = Mask[i]-LHSWidth;

    // If LHS's width is changed, shift the mask value accordingly.
    // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
    // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
    // If newRHS == newLHS, we want to remap any references from newRHS to
    // newLHS so that we can properly identify splats that may occur due to
    // obfuscation across the two vectors.
    if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
      eltMask += newLHSWidth;
  }

  // Check if this could still be a splat.
  if (eltMask >= 0) {
    if (SplatElt >= 0 && SplatElt != eltMask)
      isSplat = false;
    SplatElt = eltMask;
  }

  newMask.push_back(eltMask);
}

// If the result mask is equal to one of the original shuffle masks,
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
  if (!newRHS)
    newRHS = UndefValue::get(newLHS->getType());
  return new ShuffleVectorInst(newLHS, newRHS, newMask);
}

return MadeChange ? &SVI : nullptr;
2732}

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR/DerivedTypes.h

→

1//===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declarations of classes that represent "derived
10// types".  These are things like "arrays of x" or "structure of x, y, z" or
11// "function returning x taking (y,z) as parameters", etc...
12//
13// The implementations of these classes live in the Type.cpp file.
14//
15//===----------------------------------------------------------------------===//

17#ifndef LLVM_IR_DERIVEDTYPES_H
18#define LLVM_IR_DERIVEDTYPES_H

20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Type.h"
24#include "llvm/Support/Casting.h"
25#include "llvm/Support/Compiler.h"
26#include "llvm/Support/TypeSize.h"
27#include <cassert>
28#include <cstdint>

30namespace llvm {

32class Value;
33class APInt;
34class LLVMContext;

36/// Class to represent integer types. Note that this class is also used to
37/// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
38/// Int64Ty.
39/// Integer representation type
40class IntegerType : public Type {
friend class LLVMContextImpl;

43protected:
explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
  setSubclassData(NumBits);
}

48public:
/// This enum is just used to hold constants we need for IntegerType.
enum {
  MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
  MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
    ///< Note that bit width is stored in the Type classes SubclassData field
    ///< which has 24 bits. This yields a maximum bit width of 16,777,215
    ///< bits.
};

/// This static method is the primary way of constructing an IntegerType.
/// If an IntegerType with the same NumBits value was previously instantiated,
/// that instance will be returned. Otherwise a new one will be created. Only
/// one instance with a given NumBits value is ever created.
/// Get or create an IntegerType instance.
static IntegerType *get(LLVMContext &C, unsigned NumBits);

/// Returns type twice as wide the input type.
IntegerType *getExtendedType() const {
  return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits());
}

/// Get the number of bits in this IntegerType
unsigned getBitWidth() const { return getSubclassData(); }

/// Return a bitmask with ones set for all of the bits that can be set by an
/// unsigned version of this type. This is 0xFF for i8, 0xFFFF for i16, etc.
uint64_t getBitMask() const {
  return ~uint64_t(0UL) >> (64-getBitWidth());
}

/// Return a uint64_t with just the most significant bit set (the sign bit, if
/// the value is treated as a signed number).
uint64_t getSignBit() const {
  return 1ULL << (getBitWidth()-1);
}

/// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
/// @returns a bit mask with ones set for all the bits of this type.
/// Get a bit mask for this type.
APInt getMask() const;

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == IntegerTyID;
}
94};

96unsigned Type::getIntegerBitWidth() const {
return cast<IntegerType>(this)->getBitWidth();
98}

100/// Class to represent function types
101///
102class FunctionType : public Type {
FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);

105public:
FunctionType(const FunctionType &) = delete;
FunctionType &operator=(const FunctionType &) = delete;

/// This static method is the primary way of constructing a FunctionType.
static FunctionType *get(Type *Result,
                         ArrayRef<Type*> Params, bool isVarArg);

/// Create a FunctionType taking no parameters.
static FunctionType *get(Type *Result, bool isVarArg);

/// Return true if the specified type is valid as a return type.
static bool isValidReturnType(Type *RetTy);

/// Return true if the specified type is valid as an argument type.
static bool isValidArgumentType(Type *ArgTy);

bool isVarArg() const { return getSubclassData()!=0; }
Type *getReturnType() const { return ContainedTys[0]; }

using param_iterator = Type::subtype_iterator;

param_iterator param_begin() const { return ContainedTys + 1; }
param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
ArrayRef<Type *> params() const {
  return makeArrayRef(param_begin(), param_end());
}

/// Parameter type accessors.
Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }

/// Return the number of fixed parameters this function type requires.
/// This does not consider varargs.
unsigned getNumParams() const { return NumContainedTys - 1; }

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == FunctionTyID;
}
144};
145static_assert(alignof(FunctionType) >= alignof(Type *),
            "Alignment sufficient for objects appended to FunctionType");

148bool Type::isFunctionVarArg() const {
return cast<FunctionType>(this)->isVarArg();
150}

152Type *Type::getFunctionParamType(unsigned i) const {
return cast<FunctionType>(this)->getParamType(i);
154}

156unsigned Type::getFunctionNumParams() const {
return cast<FunctionType>(this)->getNumParams();
158}

160/// A handy container for a FunctionType+Callee-pointer pair, which can be
161/// passed around as a single entity. This assists in replacing the use of
162/// PointerType::getElementType() to access the function's type, since that's
163/// slated for removal as part of the [opaque pointer types] project.
164class FunctionCallee {
165public:
// Allow implicit conversion from types which have a getFunctionType member
// (e.g. Function and InlineAsm).
template <typename T, typename U = decltype(&T::getFunctionType)>
FunctionCallee(T *Fn)
    : FnTy(Fn ? Fn->getFunctionType() : nullptr), Callee(Fn) {}

FunctionCallee(FunctionType *FnTy, Value *Callee)
    : FnTy(FnTy), Callee(Callee) {
  assert((FnTy == nullptr) == (Callee == nullptr))((void)0);
}

FunctionCallee(std::nullptr_t) {}

FunctionCallee() = default;

FunctionType *getFunctionType() { return FnTy; }

Value *getCallee() { return Callee; }

explicit operator bool() { return Callee; }

187private:
FunctionType *FnTy = nullptr;
Value *Callee = nullptr;
190};

192/// Class to represent struct types. There are two different kinds of struct
193/// types: Literal structs and Identified structs.
194///
195/// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
196/// always have a body when created.  You can get one of these by using one of
197/// the StructType::get() forms.
198///
199/// Identified structs (e.g. %foo or %42) may optionally have a name and are not
200/// uniqued.  The names for identified structs are managed at the LLVMContext
201/// level, so there can only be a single identified struct with a given name in
202/// a particular LLVMContext.  Identified structs may also optionally be opaque
203/// (have no body specified).  You get one of these by using one of the
204/// StructType::create() forms.
205///
206/// Independent of what kind of struct you have, the body of a struct type are
207/// laid out in memory consecutively with the elements directly one after the
208/// other (if the struct is packed) or (if not packed) with padding between the
209/// elements as defined by DataLayout (which is required to match what the code
210/// generator for a target expects).
211///
212class StructType : public Type {
StructType(LLVMContext &C) : Type(C, StructTyID) {}

enum {
  /// This is the contents of the SubClassData field.
  SCDB_HasBody = 1,
  SCDB_Packed = 2,
  SCDB_IsLiteral = 4,
  SCDB_IsSized = 8
};

/// For a named struct that actually has a name, this is a pointer to the
/// symbol table entry (maintained by LLVMContext) for the struct.
/// This is null if the type is an literal struct or if it is a identified
/// type that has an empty name.
void *SymbolTableEntry = nullptr;

229public:
StructType(const StructType &) = delete;
StructType &operator=(const StructType &) = delete;

/// This creates an identified struct.
static StructType *create(LLVMContext &Context, StringRef Name);
static StructType *create(LLVMContext &Context);

static StructType *create(ArrayRef<Type *> Elements, StringRef Name,
                          bool isPacked = false);
static StructType *create(ArrayRef<Type *> Elements);
static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements,
                          StringRef Name, bool isPacked = false);
static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements);
template <class... Tys>
static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *>
create(StringRef Name, Type *elt1, Tys *... elts) {
  assert(elt1 && "Cannot create a struct type with no elements with this")((void)0);
  return create(ArrayRef<Type *>({elt1, elts...}), Name);
}

/// This static method is the primary way to create a literal StructType.
static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements,
                       bool isPacked = false);

/// Create an empty structure type.
static StructType *get(LLVMContext &Context, bool isPacked = false);

/// This static method is a convenience method for creating structure types by
/// specifying the elements as arguments. Note that this method always returns
/// a non-packed struct, and requires at least one element type.
template <class... Tys>
static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *>
get(Type *elt1, Tys *... elts) {
  assert(elt1 && "Cannot create a struct type with no elements with this")((void)0);
  LLVMContext &Ctx = elt1->getContext();
  return StructType::get(Ctx, ArrayRef<Type *>({elt1, elts...}));
}

/// Return the type with the specified name, or null if there is none by that
/// name.
static StructType *getTypeByName(LLVMContext &C, StringRef Name);

bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }

/// Return true if this type is uniqued by structural equivalence, false if it
/// is a struct definition.
bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }

/// Return true if this is a type with an identity that has no body specified
/// yet. These prints as 'opaque' in .ll files.
bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }

/// isSized - Return true if this is a sized type.
bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;

/// Returns true if this struct contains a scalable vector.
bool containsScalableVectorType() const;

/// Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }

/// Return the name for this struct type if it has an identity.
/// This may return an empty string for an unnamed struct type.  Do not call
/// this on an literal type.
StringRef getName() const;

/// Change the name of this type to the specified name, or to a name with a
/// suffix if there is a collision. Do not call this on an literal type.
void setName(StringRef Name);

/// Specify a body for an opaque identified type.
void setBody(ArrayRef<Type*> Elements, bool isPacked = false);

template <typename... Tys>
std::enable_if_t<are_base_of<Type, Tys...>::value, void>
setBody(Type *elt1, Tys *... elts) {
  assert(elt1 && "Cannot create a struct type with no elements with this")((void)0);
  setBody(ArrayRef<Type *>({elt1, elts...}));
}

/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);

// Iterator access to the elements.
using element_iterator = Type::subtype_iterator;

element_iterator element_begin() const { return ContainedTys; }
element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
ArrayRef<Type *> elements() const {
  return makeArrayRef(element_begin(), element_end());
}

/// Return true if this is layout identical to the specified struct.
bool isLayoutIdentical(StructType *Other) const;

/// Random access to the elements
unsigned getNumElements() const { return NumContainedTys; }
Type *getElementType(unsigned N) const {
  assert(N < NumContainedTys && "Element number out of range!")((void)0);
  return ContainedTys[N];
}
/// Given an index value into the type, return the type of the element.
Type *getTypeAtIndex(const Value *V) const;
Type *getTypeAtIndex(unsigned N) const { return getElementType(N); }
bool indexValid(const Value *V) const;
bool indexValid(unsigned Idx) const { return Idx < getNumElements(); }

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == StructTyID;
}
341};

343StringRef Type::getStructName() const {
return cast<StructType>(this)->getName();
345}

347unsigned Type::getStructNumElements() const {
return cast<StructType>(this)->getNumElements();
349}

351Type *Type::getStructElementType(unsigned N) const {
return cast<StructType>(this)->getElementType(N);
353}

355/// Class to represent array types.
356class ArrayType : public Type {
/// The element type of the array.
Type *ContainedType;
/// Number of elements in the array.
uint64_t NumElements;

ArrayType(Type *ElType, uint64_t NumEl);

364public:
ArrayType(const ArrayType &) = delete;
ArrayType &operator=(const ArrayType &) = delete;

uint64_t getNumElements() const { return NumElements; }
Type *getElementType() const { return ContainedType; }

/// This static method is the primary way to construct an ArrayType
static ArrayType *get(Type *ElementType, uint64_t NumElements);

/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == ArrayTyID;
}
381};

383uint64_t Type::getArrayNumElements() const {
return cast<ArrayType>(this)->getNumElements();
385}

387/// Base class of all SIMD vector types
388class VectorType : public Type {
/// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the
/// minimum number of elements of type Ty contained within the vector, and
/// 'vscale x' indicates that the total element count is an integer multiple
/// of 'n', where the multiple is either guaranteed to be one, or is
/// statically unknown at compile time.
///
/// If the multiple is known to be 1, then the extra term is discarded in
/// textual IR:
///
/// <4 x i32>          - a vector containing 4 i32s
/// <vscale x 4 x i32> - a vector containing an unknown integer multiple
///                      of 4 i32s

/// The element type of the vector.
Type *ContainedType;

405protected:
/// The element quantity of this vector. The meaning of this value depends
/// on the type of vector:
/// - For FixedVectorType = <ElementQuantity x ty>, there are
///   exactly ElementQuantity elements in this vector.
/// - For ScalableVectorType = <vscale x ElementQuantity x ty>,
///   there are vscale * ElementQuantity elements in this vector, where
///   vscale is a runtime-constant integer greater than 0.
const unsigned ElementQuantity;

VectorType(Type *ElType, unsigned EQ, Type::TypeID TID);

417public:
VectorType(const VectorType &) = delete;
VectorType &operator=(const VectorType &) = delete;

Type *getElementType() const { return ContainedType; }

/// This static method is the primary way to construct an VectorType.
static VectorType *get(Type *ElementType, ElementCount EC);

static VectorType *get(Type *ElementType, unsigned NumElements,
                       bool Scalable) {
  return VectorType::get(ElementType,
                         ElementCount::get(NumElements, Scalable));
}

static VectorType *get(Type *ElementType, const VectorType *Other) {
  return VectorType::get(ElementType, Other->getElementCount());
}

/// This static method gets a VectorType with the same number of elements as
/// the input type, and the element type is an integer type of the same width
/// as the input element type.
static VectorType *getInteger(VectorType *VTy) {
  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  assert(EltBits && "Element size must be of a non-zero size")((void)0);
  Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
  return VectorType::get(EltTy, VTy->getElementCount());
}

/// This static method is like getInteger except that the element types are
/// twice as wide as the elements in the input type.
static VectorType *getExtendedElementVectorType(VectorType *VTy) {
  assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints.")((void)0);
  auto *EltTy = cast<IntegerType>(VTy->getElementType());
  return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount());
}

// This static method gets a VectorType with the same number of elements as
// the input type, and the element type is an integer or float type which
// is half as wide as the elements in the input type.
static VectorType *getTruncatedElementVectorType(VectorType *VTy) {
  Type *EltTy;
  if (VTy->getElementType()->isFloatingPointTy()) {
    switch(VTy->getElementType()->getTypeID()) {
    case DoubleTyID:
      EltTy = Type::getFloatTy(VTy->getContext());
      break;
    case FloatTyID:
      EltTy = Type::getHalfTy(VTy->getContext());
      break;
    default:
      llvm_unreachable("Cannot create narrower fp vector element type")__builtin_unreachable();
    }
  } else {
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
    assert((EltBits & 1) == 0 &&((void)0)
           "Cannot truncate vector element with odd bit-width")((void)0);
    EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
  }
  return VectorType::get(EltTy, VTy->getElementCount());
}

// This static method returns a VectorType with a smaller number of elements
// of a larger type than the input element type. For example, a <16 x i8>
// subdivided twice would return <4 x i32>
static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) {
  for (int i = 0; i < NumSubdivs; ++i) {
    VTy = VectorType::getDoubleElementsVectorType(VTy);
    VTy = VectorType::getTruncatedElementVectorType(VTy);
  }
  return VTy;
}

/// This static method returns a VectorType with half as many elements as the
/// input type and the same element type.
static VectorType *getHalfElementsVectorType(VectorType *VTy) {
  auto EltCnt = VTy->getElementCount();
  assert(EltCnt.isKnownEven() &&((void)0)
         "Cannot halve vector with odd number of elements.")((void)0);
  return VectorType::get(VTy->getElementType(),
                         EltCnt.divideCoefficientBy(2));
}

/// This static method returns a VectorType with twice as many elements as the
/// input type and the same element type.
static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
  auto EltCnt = VTy->getElementCount();
  assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX &&((void)0)
         "Too many elements in vector")((void)0);
  return VectorType::get(VTy->getElementType(), EltCnt * 2);
}

/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);

/// Return an ElementCount instance to represent the (possibly scalable)
/// number of elements in the vector.
inline ElementCount getElementCount() const;

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == FixedVectorTyID ||
         T->getTypeID() == ScalableVectorTyID;
}
521};

523/// Class to represent fixed width SIMD vectors
524class FixedVectorType : public VectorType {
525protected:
FixedVectorType(Type *ElTy, unsigned NumElts)
    : VectorType(ElTy, NumElts, FixedVectorTyID) {}

529public:
static FixedVectorType *get(Type *ElementType, unsigned NumElts);

static FixedVectorType *get(Type *ElementType, const FixedVectorType *FVTy) {
  return get(ElementType, FVTy->getNumElements());
}

static FixedVectorType *getInteger(FixedVectorType *VTy) {
  return cast<FixedVectorType>(VectorType::getInteger(VTy));
}

static FixedVectorType *getExtendedElementVectorType(FixedVectorType *VTy) {
  return cast<FixedVectorType>(VectorType::getExtendedElementVectorType(VTy));
}

static FixedVectorType *getTruncatedElementVectorType(FixedVectorType *VTy) {
  return cast<FixedVectorType>(
      VectorType::getTruncatedElementVectorType(VTy));
}

static FixedVectorType *getSubdividedVectorType(FixedVectorType *VTy,
                                                int NumSubdivs) {
  return cast<FixedVectorType>(
      VectorType::getSubdividedVectorType(VTy, NumSubdivs));
}

static FixedVectorType *getHalfElementsVectorType(FixedVectorType *VTy) {
  return cast<FixedVectorType>(VectorType::getHalfElementsVectorType(VTy));
}

static FixedVectorType *getDoubleElementsVectorType(FixedVectorType *VTy) {
  return cast<FixedVectorType>(VectorType::getDoubleElementsVectorType(VTy));
}

static bool classof(const Type *T) {
  return T->getTypeID() == FixedVectorTyID;
}

unsigned getNumElements() const { return ElementQuantity; }
568};

570/// Class to represent scalable SIMD vectors
571class ScalableVectorType : public VectorType {
572protected:
ScalableVectorType(Type *ElTy, unsigned MinNumElts)
    : VectorType(ElTy, MinNumElts, ScalableVectorTyID) {}

576public:
static ScalableVectorType *get(Type *ElementType, unsigned MinNumElts);

static ScalableVectorType *get(Type *ElementType,
                               const ScalableVectorType *SVTy) {
  return get(ElementType, SVTy->getMinNumElements());
}

static ScalableVectorType *getInteger(ScalableVectorType *VTy) {
  return cast<ScalableVectorType>(VectorType::getInteger(VTy));
}

static ScalableVectorType *
getExtendedElementVectorType(ScalableVectorType *VTy) {
  return cast<ScalableVectorType>(
      VectorType::getExtendedElementVectorType(VTy));
}

static ScalableVectorType *
getTruncatedElementVectorType(ScalableVectorType *VTy) {
  return cast<ScalableVectorType>(
      VectorType::getTruncatedElementVectorType(VTy));
}

static ScalableVectorType *getSubdividedVectorType(ScalableVectorType *VTy,
                                                   int NumSubdivs) {
  return cast<ScalableVectorType>(
      VectorType::getSubdividedVectorType(VTy, NumSubdivs));
}

static ScalableVectorType *
getHalfElementsVectorType(ScalableVectorType *VTy) {
  return cast<ScalableVectorType>(VectorType::getHalfElementsVectorType(VTy));
}

static ScalableVectorType *
getDoubleElementsVectorType(ScalableVectorType *VTy) {
  return cast<ScalableVectorType>(
      VectorType::getDoubleElementsVectorType(VTy));
}

/// Get the minimum number of elements in this vector. The actual number of
/// elements in the vector is an integer multiple of this value.
uint64_t getMinNumElements() const { return ElementQuantity; }

static bool classof(const Type *T) {
  return T->getTypeID() == ScalableVectorTyID;
}
624};

626inline ElementCount VectorType::getElementCount() const {
return ElementCount::get(ElementQuantity, isa<ScalableVectorType>(this));
14
←
Assuming the object is not a 'ScalableVectorType'→
15
←
Calling 'LinearPolySize::get'→
18
←
Returning from 'LinearPolySize::get'→
628}

630/// Class to represent pointers.
631class PointerType : public Type {
explicit PointerType(Type *ElType, unsigned AddrSpace);
explicit PointerType(LLVMContext &C, unsigned AddrSpace);

Type *PointeeTy;

637public:
PointerType(const PointerType &) = delete;
PointerType &operator=(const PointerType &) = delete;

/// This constructs a pointer to an object of the specified type in a numbered
/// address space.
static PointerType *get(Type *ElementType, unsigned AddressSpace);
/// This constructs an opaque pointer to an object in a numbered address
/// space.
static PointerType *get(LLVMContext &C, unsigned AddressSpace);

/// This constructs a pointer to an object of the specified type in the
/// default address space (address space zero).
static PointerType *getUnqual(Type *ElementType) {
  return PointerType::get(ElementType, 0);
}

/// This constructs an opaque pointer to an object in the
/// default address space (address space zero).
static PointerType *getUnqual(LLVMContext &C) {
  return PointerType::get(C, 0);
}

/// This constructs a pointer type with the same pointee type as input
/// PointerType (or opaque pointer is the input PointerType is opaque) and the
/// given address space. This is only useful during the opaque pointer
/// transition.
/// TODO: remove after opaque pointer transition is complete.
static PointerType *getWithSamePointeeType(PointerType *PT,
                                           unsigned AddressSpace) {
  if (PT->isOpaque())
    return get(PT->getContext(), AddressSpace);
  return get(PT->getElementType(), AddressSpace);
}

Type *getElementType() const {
  assert(!isOpaque() && "Attempting to get element type of opaque pointer")((void)0);
  return PointeeTy;
}

bool isOpaque() const { return !PointeeTy; }

/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);

/// Return true if we can load or store from a pointer to this type.
static bool isLoadableOrStorableType(Type *ElemTy);

/// Return the address space of the Pointer type.
inline unsigned getAddressSpace() const { return getSubclassData(); }

/// Return true if either this is an opaque pointer type or if this pointee
/// type matches Ty. Primarily used for checking if an instruction's pointer
/// operands are valid types. Will be useless after non-opaque pointers are
/// removed.
bool isOpaqueOrPointeeTypeMatches(Type *Ty) {
  return isOpaque() || PointeeTy == Ty;
}

/// Return true if both pointer types have the same element type. Two opaque
/// pointers are considered to have the same element type, while an opaque
/// and a non-opaque pointer have different element types.
/// TODO: Remove after opaque pointer transition is complete.
bool hasSameElementTypeAs(PointerType *Other) {
  return PointeeTy == Other->PointeeTy;
}

/// Implement support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
  return T->getTypeID() == PointerTyID;
}
708};

710Type *Type::getExtendedType() const {
assert(((void)0)
    isIntOrIntVectorTy() &&((void)0)
    "Original type expected to be a vector of integers or a scalar integer.")((void)0);
if (auto *VTy = dyn_cast<VectorType>(this))
  return VectorType::getExtendedElementVectorType(
      const_cast<VectorType *>(VTy));
return cast<IntegerType>(this)->getExtendedType();
718}

720Type *Type::getWithNewType(Type *EltTy) const {
if (auto *VTy = dyn_cast<VectorType>(this))
  return VectorType::get(EltTy, VTy->getElementCount());
return EltTy;
724}

726Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const {
assert(((void)0)
    isIntOrIntVectorTy() &&((void)0)
    "Original type expected to be a vector of integers or a scalar integer.")((void)0);
return getWithNewType(getIntNTy(getContext(), NewBitWidth));
731}

733unsigned Type::getPointerAddressSpace() const {
return cast<PointerType>(getScalarType())->getAddressSpace();
735}

737} // end namespace llvm

739#endif // LLVM_IR_DERIVEDTYPES_H

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/TypeSize.h

→

1//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a struct that can be used to query the size of IR types
10// which may be scalable vectors. It provides convenience operators so that
11// it can be used in much the same way as a single scalar value.
12//
13//===----------------------------------------------------------------------===//

15#ifndef LLVM_SUPPORT_TYPESIZE_H
16#define LLVM_SUPPORT_TYPESIZE_H

18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/WithColor.h"

22#include <algorithm>
23#include <array>
24#include <cassert>
25#include <cstdint>
26#include <type_traits>

28namespace llvm {

30/// Reports a diagnostic message to indicate an invalid size request has been
31/// done on a scalable vector. This function may not return.
32void reportInvalidSizeRequest(const char *Msg);

34template <typename LeafTy> struct LinearPolyBaseTypeTraits {};

36//===----------------------------------------------------------------------===//
37// LinearPolyBase - a base class for linear polynomials with multiple
38// dimensions. This can e.g. be used to describe offsets that are have both a
39// fixed and scalable component.
40//===----------------------------------------------------------------------===//

42/// LinearPolyBase describes a linear polynomial:
43///  c0 * scale0 + c1 * scale1 + ... + cK * scaleK
44/// where the scale is implicit, so only the coefficients are encoded.
45template <typename LeafTy>
46class LinearPolyBase {
47public:
using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
              "Dimensions out of range");

53private:
std::array<ScalarTy, Dimensions> Coefficients;

56protected:
LinearPolyBase(ArrayRef<ScalarTy> Values) {
  std::copy(Values.begin(), Values.end(), Coefficients.begin());
}

61public:
friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
  for (unsigned I=0; I<Dimensions; ++I)
    LHS.Coefficients[I] += RHS.Coefficients[I];
  return LHS;
}

friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
  for (unsigned I=0; I<Dimensions; ++I)
    LHS.Coefficients[I] -= RHS.Coefficients[I];
  return LHS;
}

friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
  for (auto &C : LHS.Coefficients)
    C *= RHS;
  return LHS;
}

friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
  LeafTy Copy = LHS;
  return Copy += RHS;
}

friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
  LeafTy Copy = LHS;
  return Copy -= RHS;
}

friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
  LeafTy Copy = LHS;
  return Copy *= RHS;
}

template <typename U = ScalarTy>
friend typename std::enable_if_t<std::is_signed<U>::value, LeafTy>
operator-(const LeafTy &LHS) {
  LeafTy Copy = LHS;
  return Copy *= -1;
}

bool operator==(const LinearPolyBase &RHS) const {
  return std::equal(Coefficients.begin(), Coefficients.end(),
                    RHS.Coefficients.begin());
}

bool operator!=(const LinearPolyBase &RHS) const {
  return !(*this == RHS);
}

bool isZero() const {
  return all_of(Coefficients, [](const ScalarTy &C) { return C == 0; });
}
bool isNonZero() const { return !isZero(); }
explicit operator bool() const { return isNonZero(); }

ScalarTy getValue(unsigned Dim) const { return Coefficients[Dim]; }
118};

120//===----------------------------------------------------------------------===//
121// StackOffset - Represent an offset with named fixed and scalable components.
122//===----------------------------------------------------------------------===//

124class StackOffset;
125template <> struct LinearPolyBaseTypeTraits<StackOffset> {
using ScalarTy = int64_t;
static constexpr unsigned Dimensions = 2;
128};

130/// StackOffset is a class to represent an offset with 2 dimensions,
131/// named fixed and scalable, respectively. This class allows a value for both
132/// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed
133/// to represent stack offsets.
134class StackOffset : public LinearPolyBase<StackOffset> {
135protected:
StackOffset(ScalarTy Fixed, ScalarTy Scalable)
    : LinearPolyBase<StackOffset>({Fixed, Scalable}) {}

139public:
StackOffset() : StackOffset({0, 0}) {}
StackOffset(const LinearPolyBase<StackOffset> &Other)
    : LinearPolyBase<StackOffset>(Other) {}
static StackOffset getFixed(ScalarTy Fixed) { return {Fixed, 0}; }
static StackOffset getScalable(ScalarTy Scalable) { return {0, Scalable}; }
static StackOffset get(ScalarTy Fixed, ScalarTy Scalable) {
  return {Fixed, Scalable};
}

ScalarTy getFixed() const { return this->getValue(0); }
ScalarTy getScalable() const { return this->getValue(1); }
151};

153//===----------------------------------------------------------------------===//
154// UnivariateLinearPolyBase - a base class for linear polynomials with multiple
155// dimensions, but where only one dimension can be set at any time.
156// This can e.g. be used to describe sizes that are either fixed or scalable.
157//===----------------------------------------------------------------------===//

159/// UnivariateLinearPolyBase is a base class for ElementCount and TypeSize.
160/// Like LinearPolyBase it tries to represent a linear polynomial
161/// where only one dimension can be set at any time, e.g.
162///   0 * scale0 + 0 * scale1 + ... + cJ * scaleJ + ... + 0 * scaleK
163/// The dimension that is set is the univariate dimension.
164template <typename LeafTy>
165class UnivariateLinearPolyBase {
166public:
using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
              "Dimensions out of range");

172protected:
ScalarTy Value;         // The value at the univeriate dimension.
unsigned UnivariateDim; // The univeriate dimension.

UnivariateLinearPolyBase(ScalarTy Val, unsigned UnivariateDim)
    : Value(Val), UnivariateDim(UnivariateDim) {
  assert(UnivariateDim < Dimensions && "Dimension out of range")((void)0);
}

friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
  assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")((void)0);
  LHS.Value += RHS.Value;
  return LHS;
}

friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
  assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")((void)0);
  LHS.Value -= RHS.Value;
  return LHS;
}

friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
  LHS.Value *= RHS;
  return LHS;
}

friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
  LeafTy Copy = LHS;
  return Copy += RHS;
}

friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
  LeafTy Copy = LHS;
  return Copy -= RHS;
}

friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
  LeafTy Copy = LHS;
  return Copy *= RHS;
}

template <typename U = ScalarTy>
friend typename std::enable_if<std::is_signed<U>::value, LeafTy>::type
operator-(const LeafTy &LHS) {
  LeafTy Copy = LHS;
  return Copy *= -1;
}

220public:
bool operator==(const UnivariateLinearPolyBase &RHS) const {
  return Value == RHS.Value && UnivariateDim == RHS.UnivariateDim;
22
←
Assuming 'Value' is not equal to 'RHS.Value'→
23
←
Returning zero, which participates in a condition later→
}

bool operator!=(const UnivariateLinearPolyBase &RHS) const {
  return !(*this == RHS);
}

bool isZero() const { return !Value; }
bool isNonZero() const { return !isZero(); }
explicit operator bool() const { return isNonZero(); }
ScalarTy getValue() const { return Value; }
40
←
Returning zero→
ScalarTy getValue(unsigned Dim) const {
  return Dim == UnivariateDim ? Value : 0;
}

/// Add \p RHS to the value at the univariate dimension.
LeafTy getWithIncrement(ScalarTy RHS) const {
  return static_cast<LeafTy>(
      UnivariateLinearPolyBase(Value + RHS, UnivariateDim));
}

/// Subtract \p RHS from the value at the univariate dimension.
LeafTy getWithDecrement(ScalarTy RHS) const {
  return static_cast<LeafTy>(
      UnivariateLinearPolyBase(Value - RHS, UnivariateDim));
}
248};


251//===----------------------------------------------------------------------===//
252// LinearPolySize - base class for fixed- or scalable sizes.
253//  ^  ^ 
254//  |  |
255//  |  +----- ElementCount - Leaf class to represent an element count
256//  |                        (vscale x unsigned)
257//  |
258//  +-------- TypeSize - Leaf class to represent a type size
259//                       (vscale x uint64_t)
260//===----------------------------------------------------------------------===//

262/// LinearPolySize is a base class to represent sizes. It is either
263/// fixed-sized or it is scalable-sized, but it cannot be both.
264template <typename LeafTy>
265class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> {
// Make the parent class a friend, so that it can access the protected
// conversion/copy-constructor for UnivariatePolyBase<LeafTy> ->
// LinearPolySize<LeafTy>.
friend class UnivariateLinearPolyBase<LeafTy>;

271public:
using ScalarTy = typename UnivariateLinearPolyBase<LeafTy>::ScalarTy;
enum Dims : unsigned { FixedDim = 0, ScalableDim = 1 };

275protected:
LinearPolySize(ScalarTy MinVal, Dims D)
    : UnivariateLinearPolyBase<LeafTy>(MinVal, D) {}

LinearPolySize(const UnivariateLinearPolyBase<LeafTy> &V)
    : UnivariateLinearPolyBase<LeafTy>(V) {}

282public:

static LeafTy getFixed(ScalarTy MinVal) {
  return static_cast<LeafTy>(LinearPolySize(MinVal, FixedDim));
}
static LeafTy getScalable(ScalarTy MinVal) {
  return static_cast<LeafTy>(LinearPolySize(MinVal, ScalableDim));
}
static LeafTy get(ScalarTy MinVal, bool Scalable) {
  return static_cast<LeafTy>(
17
←
Value assigned to 'NumSrcElts.Value'→
      LinearPolySize(MinVal, Scalable15.1
'Scalable' is false
1
'Scalable' is false
1
'Scalable' is false
1
'Scalable' is false
 ? ScalableDim : FixedDim));
16
←
'?' condition is false→
}
static LeafTy getNull() { return get(0, false); }

/// Returns the minimum value this size can represent.
ScalarTy getKnownMinValue() const { return this->getValue(); }
39
←
Calling 'UnivariateLinearPolyBase::getValue'→
41
←
Returning from 'UnivariateLinearPolyBase::getValue'→
42
←
Returning zero→
/// Returns whether the size is scaled by a runtime quantity (vscale).
bool isScalable() const { return this->UnivariateDim == ScalableDim; }
/// A return value of true indicates we know at compile time that the number
/// of elements (vscale * Min) is definitely even. However, returning false
/// does not guarantee that the total number of elements is odd.
bool isKnownEven() const { return (getKnownMinValue() & 0x1) == 0; }
/// This function tells the caller whether the element count is known at
/// compile time to be a multiple of the scalar value RHS.
bool isKnownMultipleOf(ScalarTy RHS) const {
  return getKnownMinValue() % RHS == 0;
}

// Return the minimum value with the assumption that the count is exact.
// Use in places where a scalable count doesn't make sense (e.g. non-vector
// types, or vectors in backends which don't support scalable vectors).
ScalarTy getFixedValue() const {
  assert(!isScalable() &&((void)0)
         "Request for a fixed element count on a scalable object")((void)0);
  return getKnownMinValue();
}

// For some cases, size ordering between scalable and fixed size types cannot
// be determined at compile time, so such comparisons aren't allowed.
//
// e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime
// vscale >= 5, equal sized with a vscale of 4, and smaller with
// a vscale <= 3.
//
// All the functions below make use of the fact vscale is always >= 1, which
// means that <vscale x 4 x i32> is guaranteed to be >= <4 x i32>, etc.

static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
  if (!LHS.isScalable() || RHS.isScalable())
    return LHS.getKnownMinValue() < RHS.getKnownMinValue();
  return false;
}

static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
  if (LHS.isScalable() || !RHS.isScalable())
    return LHS.getKnownMinValue() > RHS.getKnownMinValue();
  return false;
}

static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
  if (!LHS.isScalable() || RHS.isScalable())
    return LHS.getKnownMinValue() <= RHS.getKnownMinValue();
  return false;
}

static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
  if (LHS.isScalable() || !RHS.isScalable())
    return LHS.getKnownMinValue() >= RHS.getKnownMinValue();
  return false;
}

/// We do not provide the '/' operator here because division for polynomial
/// types does not work in the same way as for normal integer types. We can
/// only divide the minimum value (or coefficient) by RHS, which is not the
/// same as
///   (Min * Vscale) / RHS
/// The caller is recommended to use this function in combination with
/// isKnownMultipleOf(RHS), which lets the caller know if it's possible to
/// perform a lossless divide by RHS.
LeafTy divideCoefficientBy(ScalarTy RHS) const {
  return static_cast<LeafTy>(
      LinearPolySize::get(getKnownMinValue() / RHS, isScalable()));
}

LeafTy coefficientNextPowerOf2() const {
  return static_cast<LeafTy>(LinearPolySize::get(
      static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())),
      isScalable()));
}

/// Printing function.
void print(raw_ostream &OS) const {
  if (isScalable())
    OS << "vscale x ";
  OS << getKnownMinValue();
}
378};

380class ElementCount;
381template <> struct LinearPolyBaseTypeTraits<ElementCount> {
using ScalarTy = unsigned;
static constexpr unsigned Dimensions = 2;
384};

386class ElementCount : public LinearPolySize<ElementCount> {
387public:
ElementCount() : LinearPolySize(LinearPolySize::getNull()) {}

ElementCount(const LinearPolySize<ElementCount> &V) : LinearPolySize(V) {}

/// Counting predicates.
///
///@{ Number of elements..
/// Exactly one element.
bool isScalar() const { return !isScalable() && getKnownMinValue() == 1; }
/// One or more elements.
bool isVector() const {
  return (isScalable() && getKnownMinValue() != 0) || getKnownMinValue() > 1;
}
///@}
402};

404// This class is used to represent the size of types. If the type is of fixed
405class TypeSize;
406template <> struct LinearPolyBaseTypeTraits<TypeSize> {
using ScalarTy = uint64_t;
static constexpr unsigned Dimensions = 2;
409};

411// TODO: Most functionality in this class will gradually be phased out
412// so it will resemble LinearPolySize as much as possible.
413//
414// TypeSize is used to represent the size of types. If the type is of fixed
415// size, it will represent the exact size. If the type is a scalable vector,
416// it will represent the known minimum size.
417class TypeSize : public LinearPolySize<TypeSize> {
418public:
TypeSize(const LinearPolySize<TypeSize> &V) : LinearPolySize(V) {}
TypeSize(ScalarTy MinVal, bool IsScalable)
    : LinearPolySize(LinearPolySize::get(MinVal, IsScalable)) {}

static TypeSize Fixed(ScalarTy MinVal) { return TypeSize(MinVal, false); }
static TypeSize Scalable(ScalarTy MinVal) { return TypeSize(MinVal, true); }

ScalarTy getFixedSize() const { return getFixedValue(); }
ScalarTy getKnownMinSize() const { return getKnownMinValue(); }

// All code for this class below this point is needed because of the
// temporary implicit conversion to uint64_t. The operator overloads are
// needed because otherwise the conversion of the parent class
// UnivariateLinearPolyBase -> TypeSize is ambiguous.
// TODO: Remove the implicit conversion.

// Casts to a uint64_t if this is a fixed-width size.
//
// This interface is deprecated and will be removed in a future version
// of LLVM in favour of upgrading uses that rely on this implicit conversion
// to uint64_t. Calls to functions that return a TypeSize should use the
// proper interfaces to TypeSize.
// In practice this is mostly calls to MVT/EVT::getSizeInBits().
//
// To determine how to upgrade the code:
//
//   if (<algorithm works for both scalable and fixed-width vectors>)
//     use getKnownMinValue()
//   else if (<algorithm works only for fixed-width vectors>) {
//     if <algorithm can be adapted for both scalable and fixed-width vectors>
//       update the algorithm and use getKnownMinValue()
//     else
//       bail out early for scalable vectors and use getFixedValue()
//   }
operator ScalarTy() const;

// Additional operators needed to avoid ambiguous parses
// because of the implicit conversion hack.
friend TypeSize operator*(const TypeSize &LHS, const int RHS) {
  return LHS * (ScalarTy)RHS;
}
friend TypeSize operator*(const TypeSize &LHS, const unsigned RHS) {
  return LHS * (ScalarTy)RHS;
}
friend TypeSize operator*(const TypeSize &LHS, const int64_t RHS) {
  return LHS * (ScalarTy)RHS;
}
friend TypeSize operator*(const int LHS, const TypeSize &RHS) {
  return RHS * LHS;
}
friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) {
  return RHS * LHS;
}
friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) {
  return RHS * LHS;
}
friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) {
  return RHS * LHS;
}
478};

480//===----------------------------------------------------------------------===//
481// Utilities
482//===----------------------------------------------------------------------===//

484/// Returns a TypeSize with a known minimum size that is the next integer
485/// (mod 2**64) that is greater than or equal to \p Value and is a multiple
486/// of \p Align. \p Align must be non-zero.
487///
488/// Similar to the alignTo functions in MathExtras.h
489inline TypeSize alignTo(TypeSize Size, uint64_t Align) {
assert(Align != 0u && "Align must be non-zero")((void)0);
return {(Size.getKnownMinValue() + Align - 1) / Align * Align,
        Size.isScalable()};
493}

495/// Stream operator function for `LinearPolySize`.
496template <typename LeafTy>
497inline raw_ostream &operator<<(raw_ostream &OS,
                             const LinearPolySize<LeafTy> &PS) {
PS.print(OS);
return OS;
501}

503template <typename T> struct DenseMapInfo;
504template <> struct DenseMapInfo<ElementCount> {
static inline ElementCount getEmptyKey() {
  return ElementCount::getScalable(~0U);
}
static inline ElementCount getTombstoneKey() {
  return ElementCount::getFixed(~0U - 1);
}
static unsigned getHashValue(const ElementCount &EltCnt) {
  unsigned HashVal = EltCnt.getKnownMinValue() * 37U;
  if (EltCnt.isScalable())
    return (HashVal - 1U);

  return HashVal;
}

static bool isEqual(const ElementCount &LHS, const ElementCount &RHS) {
  return LHS == RHS;
}
522};

524} // end namespace llvm

526#endif // LLVM_SUPPORT_TYPESIZE_H

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16//  Value *Exp = ...
17//  Value *X, *Y;  ConstantInt *C1, *C2;      // (X & C1) | (Y & C2)
18//  if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19//                      m_And(m_Value(Y), m_ConstantInt(C2))))) {
20//    ... Pattern is matched and variables are bound ...
21//  }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27 
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30 
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45 
46namespace llvm {
47namespace PatternMatch {
48 
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50  return const_cast<Pattern &>(P).match(V);
29
←
Calling 'ThreeOps_match::match'→
34
←
Returning from 'ThreeOps_match::match'→
35
←
Returning the value 1, which participates in a condition later→
51}
52 
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54  return const_cast<Pattern &>(P).match(Mask);
55}
56 
57template <typename SubPattern_t> struct OneUse_match {
58  SubPattern_t SubPattern;
59 
60  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61 
62  template <typename OpTy> bool match(OpTy *V) {
63    return V->hasOneUse() && SubPattern.match(V);
64  }
65};
66 
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68  return SubPattern;
69}
70 
71template <typename Class> struct class_match {
72  template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74 
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77 
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80  return class_match<UnaryOperator>();
81}
82 
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85  return class_match<BinaryOperator>();
86}
87 
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90 
91struct undef_match {
92  static bool check(const Value *V) {
93    if (isa<UndefValue>(V))
94      return true;
95 
96    const auto *CA = dyn_cast<ConstantAggregate>(V);
97    if (!CA)
98      return false;
99 
100    SmallPtrSet<const ConstantAggregate *, 8> Seen;
101    SmallVector<const ConstantAggregate *, 8> Worklist;
102 
103    // Either UndefValue, PoisonValue, or an aggregate that only contains
104    // these is accepted by matcher.
105    // CheckValue returns false if CA cannot satisfy this constraint.
106    auto CheckValue = [&](const ConstantAggregate *CA) {
107      for (const Value *Op : CA->operand_values()) {
108        if (isa<UndefValue>(Op))
109          continue;
110 
111        const auto *CA = dyn_cast<ConstantAggregate>(Op);
112        if (!CA)
113          return false;
114        if (Seen.insert(CA).second)
115          Worklist.emplace_back(CA);
116      }
117 
118      return true;
119    };
120 
121    if (!CheckValue(CA))
122      return false;
123 
124    while (!Worklist.empty()) {
125      if (!CheckValue(Worklist.pop_back_val()))
126        return false;
127    }
128    return true;
129  }
130  template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132 
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137 
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140 
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143 
144/// Match an arbitrary ConstantInt and ignore it.
145inline class_match<ConstantInt> m_ConstantInt() {
146  return class_match<ConstantInt>();
147}
148 
149/// Match an arbitrary ConstantFP and ignore it.
150inline class_match<ConstantFP> m_ConstantFP() {
151  return class_match<ConstantFP>();
152}
153 
154/// Match an arbitrary ConstantExpr and ignore it.
155inline class_match<ConstantExpr> m_ConstantExpr() {
156  return class_match<ConstantExpr>();
157}
158 
159/// Match an arbitrary basic block value and ignore it.
160inline class_match<BasicBlock> m_BasicBlock() {
161  return class_match<BasicBlock>();
162}
163 
164/// Inverting matcher
165template <typename Ty> struct match_unless {
166  Ty M;
167 
168  match_unless(const Ty &Matcher) : M(Matcher) {}
169 
170  template <typename ITy> bool match(ITy *V) { return !M.match(V); }
171};
172 
173/// Match if the inner matcher does *NOT* match.
174template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
175  return match_unless<Ty>(M);
176}
177 
178/// Matching combinators
179template <typename LTy, typename RTy> struct match_combine_or {
180  LTy L;
181  RTy R;
182 
183  match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
184 
185  template <typename ITy> bool match(ITy *V) {
186    if (L.match(V))
187      return true;
188    if (R.match(V))
189      return true;
190    return false;
191  }
192};
193 
194template <typename LTy, typename RTy> struct match_combine_and {
195  LTy L;
196  RTy R;
197 
198  match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
199 
200  template <typename ITy> bool match(ITy *V) {
201    if (L.match(V))
202      if (R.match(V))
203        return true;
204    return false;
205  }
206};
207 
208/// Combine two pattern matchers matching L || R
209template <typename LTy, typename RTy>
210inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
211  return match_combine_or<LTy, RTy>(L, R);
212}
213 
214/// Combine two pattern matchers matching L && R
215template <typename LTy, typename RTy>
216inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
217  return match_combine_and<LTy, RTy>(L, R);
218}
219 
220struct apint_match {
221  const APInt *&Res;
222  bool AllowUndef;
223 
224  apint_match(const APInt *&Res, bool AllowUndef)
225    : Res(Res), AllowUndef(AllowUndef) {}
226 
227  template <typename ITy> bool match(ITy *V) {
228    if (auto *CI = dyn_cast<ConstantInt>(V)) {
229      Res = &CI->getValue();
230      return true;
231    }
232    if (V->getType()->isVectorTy())
233      if (const auto *C = dyn_cast<Constant>(V))
234        if (auto *CI = dyn_cast_or_null<ConstantInt>(
235                C->getSplatValue(AllowUndef))) {
236          Res = &CI->getValue();
237          return true;
238        }
239    return false;
240  }
241};
242// Either constexpr if or renaming ConstantFP::getValueAPF to
243// ConstantFP::getValue is needed to do it via single template
244// function for both apint/apfloat.
245struct apfloat_match {
246  const APFloat *&Res;
247  bool AllowUndef;
248 
249  apfloat_match(const APFloat *&Res, bool AllowUndef)
250      : Res(Res), AllowUndef(AllowUndef) {}
251 
252  template <typename ITy> bool match(ITy *V) {
253    if (auto *CI = dyn_cast<ConstantFP>(V)) {
254      Res = &CI->getValueAPF();
255      return true;
256    }
257    if (V->getType()->isVectorTy())
258      if (const auto *C = dyn_cast<Constant>(V))
259        if (auto *CI = dyn_cast_or_null<ConstantFP>(
260                C->getSplatValue(AllowUndef))) {
261          Res = &CI->getValueAPF();
262          return true;
263        }
264    return false;
265  }
266};
267 
268/// Match a ConstantInt or splatted ConstantVector, binding the
269/// specified pointer to the contained APInt.
270inline apint_match m_APInt(const APInt *&Res) {
271  // Forbid undefs by default to maintain previous behavior.
272  return apint_match(Res, /* AllowUndef */ false);
273}
274 
275/// Match APInt while allowing undefs in splat vector constants.
276inline apint_match m_APIntAllowUndef(const APInt *&Res) {
277  return apint_match(Res, /* AllowUndef */ true);
278}
279 
280/// Match APInt while forbidding undefs in splat vector constants.
281inline apint_match m_APIntForbidUndef(const APInt *&Res) {
282  return apint_match(Res, /* AllowUndef */ false);
283}
284 
285/// Match a ConstantFP or splatted ConstantVector, binding the
286/// specified pointer to the contained APFloat.
287inline apfloat_match m_APFloat(const APFloat *&Res) {
288  // Forbid undefs by default to maintain previous behavior.
289  return apfloat_match(Res, /* AllowUndef */ false);
290}
291 
292/// Match APFloat while allowing undefs in splat vector constants.
293inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
294  return apfloat_match(Res, /* AllowUndef */ true);
295}
296 
297/// Match APFloat while forbidding undefs in splat vector constants.
298inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
299  return apfloat_match(Res, /* AllowUndef */ false);
300}
301 
302template <int64_t Val> struct constantint_match {
303  template <typename ITy> bool match(ITy *V) {
304    if (const auto *CI = dyn_cast<ConstantInt>(V)) {
305      const APInt &CIV = CI->getValue();
306      if (Val >= 0)
307        return CIV == static_cast<uint64_t>(Val);
308      // If Val is negative, and CI is shorter than it, truncate to the right
309      // number of bits.  If it is larger, then we have to sign extend.  Just
310      // compare their negated values.
311      return -CIV == -Val;
312    }
313    return false;
314  }
315};
316 
317/// Match a ConstantInt with a specific value.
318template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
319  return constantint_match<Val>();
320}
321 
322/// This helper class is used to match constant scalars, vector splats,
323/// and fixed width vectors that satisfy a specified predicate.
324/// For fixed width vector constants, undefined elements are ignored.
325template <typename Predicate, typename ConstantVal>
326struct cstval_pred_ty : public Predicate {
327  template <typename ITy> bool match(ITy *V) {
328    if (const auto *CV = dyn_cast<ConstantVal>(V))
329      return this->isValue(CV->getValue());
330    if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
331      if (const auto *C = dyn_cast<Constant>(V)) {
332        if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
333          return this->isValue(CV->getValue());
334 
335        // Number of elements of a scalable vector unknown at compile time
336        auto *FVTy = dyn_cast<FixedVectorType>(VTy);
337        if (!FVTy)
338          return false;
339 
340        // Non-splat vector constant: check each element for a match.
341        unsigned NumElts = FVTy->getNumElements();
342        assert(NumElts != 0 && "Constant vector with no elements?")((void)0);
343        bool HasNonUndefElements = false;
344        for (unsigned i = 0; i != NumElts; ++i) {
345          Constant *Elt = C->getAggregateElement(i);
346          if (!Elt)
347            return false;
348          if (isa<UndefValue>(Elt))
349            continue;
350          auto *CV = dyn_cast<ConstantVal>(Elt);
351          if (!CV || !this->isValue(CV->getValue()))
352            return false;
353          HasNonUndefElements = true;
354        }
355        return HasNonUndefElements;
356      }
357    }
358    return false;
359  }
360};
361 
362/// specialization of cstval_pred_ty for ConstantInt
363template <typename Predicate>
364using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
365 
366/// specialization of cstval_pred_ty for ConstantFP
367template <typename Predicate>
368using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
369 
370/// This helper class is used to match scalar and vector constants that
371/// satisfy a specified predicate, and bind them to an APInt.
372template <typename Predicate> struct api_pred_ty : public Predicate {
373  const APInt *&Res;
374 
375  api_pred_ty(const APInt *&R) : Res(R) {}
376 
377  template <typename ITy> bool match(ITy *V) {
378    if (const auto *CI = dyn_cast<ConstantInt>(V))
379      if (this->isValue(CI->getValue())) {
380        Res = &CI->getValue();
381        return true;
382      }
383    if (V->getType()->isVectorTy())
384      if (const auto *C = dyn_cast<Constant>(V))
385        if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
386          if (this->isValue(CI->getValue())) {
387            Res = &CI->getValue();
388            return true;
389          }
390 
391    return false;
392  }
393};
394 
395/// This helper class is used to match scalar and vector constants that
396/// satisfy a specified predicate, and bind them to an APFloat.
397/// Undefs are allowed in splat vector constants.
398template <typename Predicate> struct apf_pred_ty : public Predicate {
399  const APFloat *&Res;
400 
401  apf_pred_ty(const APFloat *&R) : Res(R) {}
402 
403  template <typename ITy> bool match(ITy *V) {
404    if (const auto *CI = dyn_cast<ConstantFP>(V))
405      if (this->isValue(CI->getValue())) {
406        Res = &CI->getValue();
407        return true;
408      }
409    if (V->getType()->isVectorTy())
410      if (const auto *C = dyn_cast<Constant>(V))
411        if (auto *CI = dyn_cast_or_null<ConstantFP>(
412                C->getSplatValue(/* AllowUndef */ true)))
413          if (this->isValue(CI->getValue())) {
414            Res = &CI->getValue();
415            return true;
416          }
417 
418    return false;
419  }
420};
421 
422///////////////////////////////////////////////////////////////////////////////
423//
424// Encapsulate constant value queries for use in templated predicate matchers.
425// This allows checking if constants match using compound predicates and works
426// with vector constants, possibly with relaxed constraints. For example, ignore
427// undef values.
428//
429///////////////////////////////////////////////////////////////////////////////
430 
431struct is_any_apint {
432  bool isValue(const APInt &C) { return true; }
433};
434/// Match an integer or vector with any integral constant.
435/// For vectors, this includes constants with undefined elements.
436inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
437  return cst_pred_ty<is_any_apint>();
438}
439 
440struct is_all_ones {
441  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
442};
443/// Match an integer or vector with all bits set.
444/// For vectors, this includes constants with undefined elements.
445inline cst_pred_ty<is_all_ones> m_AllOnes() {
446  return cst_pred_ty<is_all_ones>();
447}
448 
449struct is_maxsignedvalue {
450  bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
451};
452/// Match an integer or vector with values having all bits except for the high
453/// bit set (0x7f...).
454/// For vectors, this includes constants with undefined elements.
455inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
456  return cst_pred_ty<is_maxsignedvalue>();
457}
458inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
459  return V;
460}
461 
462struct is_negative {
463  bool isValue(const APInt &C) { return C.isNegative(); }
464};
465/// Match an integer or vector of negative values.
466/// For vectors, this includes constants with undefined elements.
467inline cst_pred_ty<is_negative> m_Negative() {
468  return cst_pred_ty<is_negative>();
469}
470inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
471  return V;
472}
473 
474struct is_nonnegative {
475  bool isValue(const APInt &C) { return C.isNonNegative(); }
476};
477/// Match an integer or vector of non-negative values.
478/// For vectors, this includes constants with undefined elements.
479inline cst_pred_ty<is_nonnegative> m_NonNegative() {
480  return cst_pred_ty<is_nonnegative>();
481}
482inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
483  return V;
484}
485 
486struct is_strictlypositive {
487  bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
488};
489/// Match an integer or vector of strictly positive values.
490/// For vectors, this includes constants with undefined elements.
491inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
492  return cst_pred_ty<is_strictlypositive>();
493}
494inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
495  return V;
496}
497 
498struct is_nonpositive {
499  bool isValue(const APInt &C) { return C.isNonPositive(); }
500};
501/// Match an integer or vector of non-positive values.
502/// For vectors, this includes constants with undefined elements.
503inline cst_pred_ty<is_nonpositive> m_NonPositive() {
504  return cst_pred_ty<is_nonpositive>();
505}
506inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
507 
508struct is_one {
509  bool isValue(const APInt &C) { return C.isOneValue(); }
510};
511/// Match an integer 1 or a vector with all elements equal to 1.
512/// For vectors, this includes constants with undefined elements.
513inline cst_pred_ty<is_one> m_One() {
514  return cst_pred_ty<is_one>();
515}
516 
517struct is_zero_int {
518  bool isValue(const APInt &C) { return C.isNullValue(); }
519};
520/// Match an integer 0 or a vector with all elements equal to 0.
521/// For vectors, this includes constants with undefined elements.
522inline cst_pred_ty<is_zero_int> m_ZeroInt() {
523  return cst_pred_ty<is_zero_int>();
524}
525 
526struct is_zero {
527  template <typename ITy> bool match(ITy *V) {
528    auto *C = dyn_cast<Constant>(V);
529    // FIXME: this should be able to do something for scalable vectors
530    return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
531  }
532};
533/// Match any null constant or a vector with all elements equal to 0.
534/// For vectors, this includes constants with undefined elements.
535inline is_zero m_Zero() {
536  return is_zero();
537}
538 
539struct is_power2 {
540  bool isValue(const APInt &C) { return C.isPowerOf2(); }
541};
542/// Match an integer or vector power-of-2.
543/// For vectors, this includes constants with undefined elements.
544inline cst_pred_ty<is_power2> m_Power2() {
545  return cst_pred_ty<is_power2>();
546}
547inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
548  return V;
549}
550 
551struct is_negated_power2 {
552  bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
553};
554/// Match a integer or vector negated power-of-2.
555/// For vectors, this includes constants with undefined elements.
556inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
557  return cst_pred_ty<is_negated_power2>();
558}
559inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
560  return V;
561}
562 
563struct is_power2_or_zero {
564  bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
565};
566/// Match an integer or vector of 0 or power-of-2 values.
567/// For vectors, this includes constants with undefined elements.
568inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
569  return cst_pred_ty<is_power2_or_zero>();
570}
571inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
572  return V;
573}
574 
575struct is_sign_mask {
576  bool isValue(const APInt &C) { return C.isSignMask(); }
577};
578/// Match an integer or vector with only the sign bit(s) set.
579/// For vectors, this includes constants with undefined elements.
580inline cst_pred_ty<is_sign_mask> m_SignMask() {
581  return cst_pred_ty<is_sign_mask>();
582}
583 
584struct is_lowbit_mask {
585  bool isValue(const APInt &C) { return C.isMask(); }
586};
587/// Match an integer or vector with only the low bit(s) set.
588/// For vectors, this includes constants with undefined elements.
589inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
590  return cst_pred_ty<is_lowbit_mask>();
591}
592 
593struct icmp_pred_with_threshold {
594  ICmpInst::Predicate Pred;
595  const APInt *Thr;
596  bool isValue(const APInt &C) {
597    switch (Pred) {
598    case ICmpInst::Predicate::ICMP_EQ:
599      return C.eq(*Thr);
600    case ICmpInst::Predicate::ICMP_NE:
601      return C.ne(*Thr);
602    case ICmpInst::Predicate::ICMP_UGT:
603      return C.ugt(*Thr);
604    case ICmpInst::Predicate::ICMP_UGE:
605      return C.uge(*Thr);
606    case ICmpInst::Predicate::ICMP_ULT:
607      return C.ult(*Thr);
608    case ICmpInst::Predicate::ICMP_ULE:
609      return C.ule(*Thr);
610    case ICmpInst::Predicate::ICMP_SGT:
611      return C.sgt(*Thr);
612    case ICmpInst::Predicate::ICMP_SGE:
613      return C.sge(*Thr);
614    case ICmpInst::Predicate::ICMP_SLT:
615      return C.slt(*Thr);
616    case ICmpInst::Predicate::ICMP_SLE:
617      return C.sle(*Thr);
618    default:
619      llvm_unreachable("Unhandled ICmp predicate")__builtin_unreachable();
620    }
621  }
622};
623/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
624/// to Threshold. For vectors, this includes constants with undefined elements.
625inline cst_pred_ty<icmp_pred_with_threshold>
626m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
627  cst_pred_ty<icmp_pred_with_threshold> P;
628  P.Pred = Predicate;
629  P.Thr = &Threshold;
630  return P;
631}
632 
633struct is_nan {
634  bool isValue(const APFloat &C) { return C.isNaN(); }
635};
636/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
637/// For vectors, this includes constants with undefined elements.
638inline cstfp_pred_ty<is_nan> m_NaN() {
639  return cstfp_pred_ty<is_nan>();
640}
641 
642struct is_nonnan {
643  bool isValue(const APFloat &C) { return !C.isNaN(); }
644};
645/// Match a non-NaN FP constant.
646/// For vectors, this includes constants with undefined elements.
647inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
648  return cstfp_pred_ty<is_nonnan>();
649}
650 
651struct is_inf {
652  bool isValue(const APFloat &C) { return C.isInfinity(); }
653};
654/// Match a positive or negative infinity FP constant.
655/// For vectors, this includes constants with undefined elements.
656inline cstfp_pred_ty<is_inf> m_Inf() {
657  return cstfp_pred_ty<is_inf>();
658}
659 
660struct is_noninf {
661  bool isValue(const APFloat &C) { return !C.isInfinity(); }
662};
663/// Match a non-infinity FP constant, i.e. finite or NaN.
664/// For vectors, this includes constants with undefined elements.
665inline cstfp_pred_ty<is_noninf> m_NonInf() {
666  return cstfp_pred_ty<is_noninf>();
667}
668 
669struct is_finite {
670  bool isValue(const APFloat &C) { return C.isFinite(); }
671};
672/// Match a finite FP constant, i.e. not infinity or NaN.
673/// For vectors, this includes constants with undefined elements.
674inline cstfp_pred_ty<is_finite> m_Finite() {
675  return cstfp_pred_ty<is_finite>();
676}
677inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
678 
679struct is_finitenonzero {
680  bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
681};
682/// Match a finite non-zero FP constant.
683/// For vectors, this includes constants with undefined elements.
684inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
685  return cstfp_pred_ty<is_finitenonzero>();
686}
687inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
688  return V;
689}
690 
691struct is_any_zero_fp {
692  bool isValue(const APFloat &C) { return C.isZero(); }
693};
694/// Match a floating-point negative zero or positive zero.
695/// For vectors, this includes constants with undefined elements.
696inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
697  return cstfp_pred_ty<is_any_zero_fp>();
698}
699 
700struct is_pos_zero_fp {
701  bool isValue(const APFloat &C) { return C.isPosZero(); }
702};
703/// Match a floating-point positive zero.
704/// For vectors, this includes constants with undefined elements.
705inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
706  return cstfp_pred_ty<is_pos_zero_fp>();
707}
708 
709struct is_neg_zero_fp {
710  bool isValue(const APFloat &C) { return C.isNegZero(); }
711};
712/// Match a floating-point negative zero.
713/// For vectors, this includes constants with undefined elements.
714inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
715  return cstfp_pred_ty<is_neg_zero_fp>();
716}
717 
718struct is_non_zero_fp {
719  bool isValue(const APFloat &C) { return C.isNonZero(); }
720};
721/// Match a floating-point non-zero.
722/// For vectors, this includes constants with undefined elements.
723inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
724  return cstfp_pred_ty<is_non_zero_fp>();
725}
726 
727///////////////////////////////////////////////////////////////////////////////
728 
729template <typename Class> struct bind_ty {
730  Class *&VR;
731 
732  bind_ty(Class *&V) : VR(V) {}
733 
734  template <typename ITy> bool match(ITy *V) {
735    if (auto *CV = dyn_cast<Class>(V)) {
736      VR = CV;
737      return true;
738    }
739    return false;
740  }
741};
742 
743/// Match a value, capturing it if we match.
744inline bind_ty<Value> m_Value(Value *&V) { return V; }
745inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
746 
747/// Match an instruction, capturing it if we match.
748inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
749/// Match a unary operator, capturing it if we match.
750inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
751/// Match a binary operator, capturing it if we match.
752inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
753/// Match a with overflow intrinsic, capturing it if we match.
754inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
755inline bind_ty<const WithOverflowInst>
756m_WithOverflowInst(const WithOverflowInst *&I) {
757  return I;
758}
759 
760/// Match a Constant, capturing the value if we match.
761inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
762 
763/// Match a ConstantInt, capturing the value if we match.
764inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
765 
766/// Match a ConstantFP, capturing the value if we match.
767inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
768 
769/// Match a ConstantExpr, capturing the value if we match.
770inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
771 
772/// Match a basic block value, capturing it if we match.
773inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
774inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
775  return V;
776}
777 
778/// Match an arbitrary immediate Constant and ignore it.
779inline match_combine_and<class_match<Constant>,
780                         match_unless<class_match<ConstantExpr>>>
781m_ImmConstant() {
782  return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
783}
784 
785/// Match an immediate Constant, capturing the value if we match.
786inline match_combine_and<bind_ty<Constant>,
787                         match_unless<class_match<ConstantExpr>>>
788m_ImmConstant(Constant *&C) {
789  return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
790}
791 
792/// Match a specified Value*.
793struct specificval_ty {
794  const Value *Val;
795 
796  specificval_ty(const Value *V) : Val(V) {}
797 
798  template <typename ITy> bool match(ITy *V) { return V == Val; }
799};
800 
801/// Match if we have a specific specified value.
802inline specificval_ty m_Specific(const Value *V) { return V; }
803 
804/// Stores a reference to the Value *, not the Value * itself,
805/// thus can be used in commutative matchers.
806template <typename Class> struct deferredval_ty {
807  Class *const &Val;
808 
809  deferredval_ty(Class *const &V) : Val(V) {}
810 
811  template <typename ITy> bool match(ITy *const V) { return V == Val; }
812};
813 
814/// Like m_Specific(), but works if the specific value to match is determined
815/// as part of the same match() expression. For example:
816/// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will
817/// bind X before the pattern match starts.
818/// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against
819/// whichever value m_Value(X) populated.
820inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
821inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
822  return V;
823}
824 
825/// Match a specified floating point value or vector of all elements of
826/// that value.
827struct specific_fpval {
828  double Val;
829 
830  specific_fpval(double V) : Val(V) {}
831 
832  template <typename ITy> bool match(ITy *V) {
833    if (const auto *CFP = dyn_cast<ConstantFP>(V))
834      return CFP->isExactlyValue(Val);
835    if (V->getType()->isVectorTy())
836      if (const auto *C = dyn_cast<Constant>(V))
837        if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
838          return CFP->isExactlyValue(Val);
839    return false;
840  }
841};
842 
843/// Match a specific floating point value or vector with all elements
844/// equal to the value.
845inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
846 
847/// Match a float 1.0 or vector with all elements equal to 1.0.
848inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
849 
850struct bind_const_intval_ty {
851  uint64_t &VR;
852 
853  bind_const_intval_ty(uint64_t &V) : VR(V) {}
854 
855  template <typename ITy> bool match(ITy *V) {
856    if (const auto *CV = dyn_cast<ConstantInt>(V))
857      if (CV->getValue().ule(UINT64_MAX0xffffffffffffffffULL)) {
858        VR = CV->getZExtValue();
859        return true;
860      }
861    return false;
862  }
863};
864 
865/// Match a specified integer value or vector of all elements of that
866/// value.
867template <bool AllowUndefs>
868struct specific_intval {
869  APInt Val;
870 
871  specific_intval(APInt V) : Val(std::move(V)) {}
872 
873  template <typename ITy> bool match(ITy *V) {
874    const auto *CI = dyn_cast<ConstantInt>(V);
875    if (!CI && V->getType()->isVectorTy())
876      if (const auto *C = dyn_cast<Constant>(V))
877        CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
878 
879    return CI && APInt::isSameValue(CI->getValue(), Val);
880  }
881};
882 
883/// Match a specific integer value or vector with all elements equal to
884/// the value.
885inline specific_intval<false> m_SpecificInt(APInt V) {
886  return specific_intval<false>(std::move(V));
887}
888 
889inline specific_intval<false> m_SpecificInt(uint64_t V) {
890  return m_SpecificInt(APInt(64, V));
891}
892 
893inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
894  return specific_intval<true>(std::move(V));
895}
896 
897inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
898  return m_SpecificIntAllowUndef(APInt(64, V));
899}
900 
901/// Match a ConstantInt and bind to its value.  This does not match
902/// ConstantInts wider than 64-bits.
903inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
904 
905/// Match a specified basic block value.
906struct specific_bbval {
907  BasicBlock *Val;
908 
909  specific_bbval(BasicBlock *Val) : Val(Val) {}
910 
911  template <typename ITy> bool match(ITy *V) {
912    const auto *BB = dyn_cast<BasicBlock>(V);
913    return BB && BB == Val;
914  }
915};
916 
917/// Match a specific basic block value.
918inline specific_bbval m_SpecificBB(BasicBlock *BB) {
919  return specific_bbval(BB);
920}
921 
922/// A commutative-friendly version of m_Specific().
923inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
924  return BB;
925}
926inline deferredval_ty<const BasicBlock>
927m_Deferred(const BasicBlock *const &BB) {
928  return BB;
929}
930 
931//===----------------------------------------------------------------------===//
932// Matcher for any binary operator.
933//
934template <typename LHS_t, typename RHS_t, bool Commutable = false>
935struct AnyBinaryOp_match {
936  LHS_t L;
937  RHS_t R;
938 
939  // The evaluation order is always stable, regardless of Commutability.
940  // The LHS is always matched first.
941  AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
942 
943  template <typename OpTy> bool match(OpTy *V) {
944    if (auto *I = dyn_cast<BinaryOperator>(V))
945      return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
946             (Commutable && L.match(I->getOperand(1)) &&
947              R.match(I->getOperand(0)));
948    return false;
949  }
950};
951 
952template <typename LHS, typename RHS>
953inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
954  return AnyBinaryOp_match<LHS, RHS>(L, R);
955}
956 
957//===----------------------------------------------------------------------===//
958// Matcher for any unary operator.
959// TODO fuse unary, binary matcher into n-ary matcher
960//
961template <typename OP_t> struct AnyUnaryOp_match {
962  OP_t X;
963 
964  AnyUnaryOp_match(const OP_t &X) : X(X) {}
965 
966  template <typename OpTy> bool match(OpTy *V) {
967    if (auto *I = dyn_cast<UnaryOperator>(V))
968      return X.match(I->getOperand(0));
969    return false;
970  }
971};
972 
973template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) {
974  return AnyUnaryOp_match<OP_t>(X);
975}
976 
977//===----------------------------------------------------------------------===//
978// Matchers for specific binary operators.
979//
980 
981template <typename LHS_t, typename RHS_t, unsigned Opcode,
982          bool Commutable = false>
983struct BinaryOp_match {
984  LHS_t L;
985  RHS_t R;
986 
987  // The evaluation order is always stable, regardless of Commutability.
988  // The LHS is always matched first.
989  BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
990 
991  template <typename OpTy> bool match(OpTy *V) {
992    if (V->getValueID() == Value::InstructionVal + Opcode) {
993      auto *I = cast<BinaryOperator>(V);
994      return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
995             (Commutable && L.match(I->getOperand(1)) &&
996              R.match(I->getOperand(0)));
997    }
998    if (auto *CE = dyn_cast<ConstantExpr>(V))
999      return CE->getOpcode() == Opcode &&
1000             ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
1001              (Commutable && L.match(CE->getOperand(1)) &&
1002               R.match(CE->getOperand(0))));
1003    return false;
1004  }
1005};
1006 
1007template <typename LHS, typename RHS>
1008inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
1009                                                        const RHS &R) {
1010  return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
1011}
1012 
1013template <typename LHS, typename RHS>
1014inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
1015                                                          const RHS &R) {
1016  return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
1017}
1018 
1019template <typename LHS, typename RHS>
1020inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
1021                                                        const RHS &R) {
1022  return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
1023}
1024 
1025template <typename LHS, typename RHS>
1026inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
1027                                                          const RHS &R) {
1028  return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
1029}
1030 
1031template <typename Op_t> struct FNeg_match {
1032  Op_t X;
1033 
1034  FNeg_match(const Op_t &Op) : X(Op) {}
1035  template <typename OpTy> bool match(OpTy *V) {
1036    auto *FPMO = dyn_cast<FPMathOperator>(V);
1037    if (!FPMO) return false;
1038 
1039    if (FPMO->getOpcode() == Instruction::FNeg)
1040      return X.match(FPMO->getOperand(0));
1041 
1042    if (FPMO->getOpcode() == Instruction::FSub) {
1043      if (FPMO->hasNoSignedZeros()) {
1044        // With 'nsz', any zero goes.
1045        if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
1046          return false;
1047      } else {
1048        // Without 'nsz', we need fsub -0.0, X exactly.
1049        if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
1050          return false;
1051      }
1052 
1053      return X.match(FPMO->getOperand(1));
1054    }
1055 
1056    return false;
1057  }
1058};
1059 
1060/// Match 'fneg X' as 'fsub -0.0, X'.
1061template <typename OpTy>
1062inline FNeg_match<OpTy>
1063m_FNeg(const OpTy &X) {
1064  return FNeg_match<OpTy>(X);
1065}
1066 
1067/// Match 'fneg X' as 'fsub +-0.0, X'.
1068template <typename RHS>
1069inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
1070m_FNegNSZ(const RHS &X) {
1071  return m_FSub(m_AnyZeroFP(), X);
1072}
1073 
1074template <typename LHS, typename RHS>
1075inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
1076                                                        const RHS &R) {
1077  return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
1078}
1079 
1080template <typename LHS, typename RHS>
1081inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
1082                                                          const RHS &R) {
1083  return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
1084}
1085 
1086template <typename LHS, typename RHS>
1087inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
1088                                                          const RHS &R) {
1089  return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
1090}
1091 
1092template <typename LHS, typename RHS>
1093inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
1094                                                          const RHS &R) {
1095  return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
1096}
1097 
1098template <typename LHS, typename RHS>
1099inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
1100                                                          const RHS &R) {
1101  return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
1102}
1103 
1104template <typename LHS, typename RHS>
1105inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
1106                                                          const RHS &R) {
1107  return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
1108}
1109 
1110template <typename LHS, typename RHS>
1111inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
1112                                                          const RHS &R) {
1113  return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
1114}
1115 
1116template <typename LHS, typename RHS>
1117inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
1118                                                          const RHS &R) {
1119  return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
1120}
1121 
1122template <typename LHS, typename RHS>
1123inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
1124                                                        const RHS &R) {
1125  return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
1126}
1127 
1128template <typename LHS, typename RHS>
1129inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
1130                                                      const RHS &R) {
1131  return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
1132}
1133 
1134template <typename LHS, typename RHS>
1135inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
1136                                                        const RHS &R) {
1137  return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
1138}
1139 
1140template <typename LHS, typename RHS>
1141inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
1142                                                        const RHS &R) {
1143  return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
1144}
1145 
1146template <typename LHS, typename RHS>
1147inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
1148                                                          const RHS &R) {
1149  return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
1150}
1151 
1152template <typename LHS, typename RHS>
1153inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
1154                                                          const RHS &R) {
1155  return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
1156}
1157 
1158template <typename LHS_t, typename RHS_t, unsigned Opcode,
1159          unsigned WrapFlags = 0>
1160struct OverflowingBinaryOp_match {
1161  LHS_t L;
1162  RHS_t R;
1163 
1164  OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
1165      : L(LHS), R(RHS) {}
1166 
1167  template <typename OpTy> bool match(OpTy *V) {
1168    if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
1169      if (Op->getOpcode() != Opcode)
1170        return false;
1171      if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) &&
1172          !Op->hasNoUnsignedWrap())
1173        return false;
1174      if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) &&
1175          !Op->hasNoSignedWrap())
1176        return false;
1177      return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
1178    }
1179    return false;
1180  }
1181};
1182 
1183template <typename LHS, typename RHS>
1184inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1185                                 OverflowingBinaryOperator::NoSignedWrap>
1186m_NSWAdd(const LHS &L, const RHS &R) {
1187  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1188                                   OverflowingBinaryOperator::NoSignedWrap>(
1189      L, R);
1190}
1191template <typename LHS, typename RHS>
1192inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1193                                 OverflowingBinaryOperator::NoSignedWrap>
1194m_NSWSub(const LHS &L, const RHS &R) {
1195  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1196                                   OverflowingBinaryOperator::NoSignedWrap>(
1197      L, R);
1198}
1199template <typename LHS, typename RHS>
1200inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1201                                 OverflowingBinaryOperator::NoSignedWrap>
1202m_NSWMul(const LHS &L, const RHS &R) {
1203  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1204                                   OverflowingBinaryOperator::NoSignedWrap>(
1205      L, R);
1206}
1207template <typename LHS, typename RHS>
1208inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1209                                 OverflowingBinaryOperator::NoSignedWrap>
1210m_NSWShl(const LHS &L, const RHS &R) {
1211  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1212                                   OverflowingBinaryOperator::NoSignedWrap>(
1213      L, R);
1214}
1215 
1216template <typename LHS, typename RHS>
1217inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1218                                 OverflowingBinaryOperator::NoUnsignedWrap>
1219m_NUWAdd(const LHS &L, const RHS &R) {
1220  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1221                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1222      L, R);
1223}
1224template <typename LHS, typename RHS>
1225inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1226                                 OverflowingBinaryOperator::NoUnsignedWrap>
1227m_NUWSub(const LHS &L, const RHS &R) {
1228  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1229                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1230      L, R);
1231}
1232template <typename LHS, typename RHS>
1233inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1234                                 OverflowingBinaryOperator::NoUnsignedWrap>
1235m_NUWMul(const LHS &L, const RHS &R) {
1236  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1237                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1238      L, R);
1239}
1240template <typename LHS, typename RHS>
1241inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1242                                 OverflowingBinaryOperator::NoUnsignedWrap>
1243m_NUWShl(const LHS &L, const RHS &R) {
1244  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1245                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1246      L, R);
1247}
1248 
1249//===----------------------------------------------------------------------===//
1250// Class that matches a group of binary opcodes.
1251//
1252template <typename LHS_t, typename RHS_t, typename Predicate>
1253struct BinOpPred_match : Predicate {
1254  LHS_t L;
1255  RHS_t R;
1256 
1257  BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1258 
1259  template <typename OpTy> bool match(OpTy *V) {
1260    if (auto *I = dyn_cast<Instruction>(V))
1261      return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1262             R.match(I->getOperand(1));
1263    if (auto *CE = dyn_cast<ConstantExpr>(V))
1264      return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1265             R.match(CE->getOperand(1));
1266    return false;
1267  }
1268};
1269 
1270struct is_shift_op {
1271  bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1272};
1273 
1274struct is_right_shift_op {
1275  bool isOpType(unsigned Opcode) {
1276    return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1277  }
1278};
1279 
1280struct is_logical_shift_op {
1281  bool isOpType(unsigned Opcode) {
1282    return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1283  }
1284};
1285 
1286struct is_bitwiselogic_op {
1287  bool isOpType(unsigned Opcode) {
1288    return Instruction::isBitwiseLogicOp(Opcode);
1289  }
1290};
1291 
1292struct is_idiv_op {
1293  bool isOpType(unsigned Opcode) {
1294    return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1295  }
1296};
1297 
1298struct is_irem_op {
1299  bool isOpType(unsigned Opcode) {
1300    return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1301  }
1302};
1303 
1304/// Matches shift operations.
1305template <typename LHS, typename RHS>
1306inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1307                                                      const RHS &R) {
1308  return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1309}
1310 
1311/// Matches logical shift operations.
1312template <typename LHS, typename RHS>
1313inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1314                                                          const RHS &R) {
1315  return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1316}
1317 
1318/// Matches logical shift operations.
1319template <typename LHS, typename RHS>
1320inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1321m_LogicalShift(const LHS &L, const RHS &R) {
1322  return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1323}
1324 
1325/// Matches bitwise logic operations.
1326template <typename LHS, typename RHS>
1327inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1328m_BitwiseLogic(const LHS &L, const RHS &R) {
1329  return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1330}
1331 
1332/// Matches integer division operations.
1333template <typename LHS, typename RHS>
1334inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1335                                                    const RHS &R) {
1336  return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1337}
1338 
1339/// Matches integer remainder operations.
1340template <typename LHS, typename RHS>
1341inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1342                                                    const RHS &R) {
1343  return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1344}
1345 
1346//===----------------------------------------------------------------------===//
1347// Class that matches exact binary ops.
1348//
1349template <typename SubPattern_t> struct Exact_match {
1350  SubPattern_t SubPattern;
1351 
1352  Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1353 
1354  template <typename OpTy> bool match(OpTy *V) {
1355    if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1356      return PEO->isExact() && SubPattern.match(V);
1357    return false;
1358  }
1359};
1360 
1361template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1362  return SubPattern;
1363}
1364 
1365//===----------------------------------------------------------------------===//
1366// Matchers for CmpInst classes
1367//
1368 
1369template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1370          bool Commutable = false>
1371struct CmpClass_match {
1372  PredicateTy &Predicate;
1373  LHS_t L;
1374  RHS_t R;
1375 
1376  // The evaluation order is always stable, regardless of Commutability.
1377  // The LHS is always matched first.
1378  CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1379      : Predicate(Pred), L(LHS), R(RHS) {}
1380 
1381  template <typename OpTy> bool match(OpTy *V) {
1382    if (auto *I = dyn_cast<Class>(V)) {
1383      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1384        Predicate = I->getPredicate();
1385        return true;
1386      } else if (Commutable && L.match(I->getOperand(1)) &&
1387           R.match(I->getOperand(0))) {
1388        Predicate = I->getSwappedPredicate();
1389        return true;
1390      }
1391    }
1392    return false;
1393  }
1394};
1395 
1396template <typename LHS, typename RHS>
1397inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1398m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1399  return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1400}
1401 
1402template <typename LHS, typename RHS>
1403inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1404m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1405  return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1406}
1407 
1408template <typename LHS, typename RHS>
1409inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1410m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1411  return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1412}
1413 
1414//===----------------------------------------------------------------------===//
1415// Matchers for instructions with a given opcode and number of operands.
1416//
1417 
1418/// Matches instructions with Opcode and three operands.
1419template <typename T0, unsigned Opcode> struct OneOps_match {
1420  T0 Op1;
1421 
1422  OneOps_match(const T0 &Op1) : Op1(Op1) {}
1423 
1424  template <typename OpTy> bool match(OpTy *V) {
1425    if (V->getValueID() == Value::InstructionVal + Opcode) {
1426      auto *I = cast<Instruction>(V);
1427      return Op1.match(I->getOperand(0));
1428    }
1429    return false;
1430  }
1431};
1432 
1433/// Matches instructions with Opcode and three operands.
1434template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1435  T0 Op1;
1436  T1 Op2;
1437 
1438  TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1439 
1440  template <typename OpTy> bool match(OpTy *V) {
1441    if (V->getValueID() == Value::InstructionVal + Opcode) {
1442      auto *I = cast<Instruction>(V);
1443      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1444    }
1445    return false;
1446  }
1447};
1448 
1449/// Matches instructions with Opcode and three operands.
1450template <typename T0, typename T1, typename T2, unsigned Opcode>
1451struct ThreeOps_match {
1452  T0 Op1;
1453  T1 Op2;
1454  T2 Op3;
1455 
1456  ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1457      : Op1(Op1), Op2(Op2), Op3(Op3) {}
1458 
1459  template <typename OpTy> bool match(OpTy *V) {
1460    if (V->getValueID() == Value::InstructionVal + Opcode) {
30
←
Assuming the condition is true→
31
←
Taking true branch→
1461      auto *I = cast<Instruction>(V);
32
←
'V' is a 'Instruction'→
1462      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
33
←
Returning the value 1, which participates in a condition later→
1463             Op3.match(I->getOperand(2));
1464    }
1465    return false;
1466  }
1467};
1468 
1469/// Matches SelectInst.
1470template <typename Cond, typename LHS, typename RHS>
1471inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1472m_Select(const Cond &C, const LHS &L, const RHS &R) {
1473  return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1474}
1475 
1476/// This matches a select of two constants, e.g.:
1477/// m_SelectCst<-1, 0>(m_Value(V))
1478template <int64_t L, int64_t R, typename Cond>
1479inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1480                      Instruction::Select>
1481m_SelectCst(const Cond &C) {
1482  return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1483}
1484 
1485/// Matches FreezeInst.
1486template <typename OpTy>
1487inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1488  return OneOps_match<OpTy, Instruction::Freeze>(Op);
1489}
1490 
1491/// Matches InsertElementInst.
1492template <typename Val_t, typename Elt_t, typename Idx_t>
1493inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1494m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1495  return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1496      Val, Elt, Idx);
1497}
1498 
1499/// Matches ExtractElementInst.
1500template <typename Val_t, typename Idx_t>
1501inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1502m_ExtractElt(const Val_t &Val, const Idx_t &Idx) {
1503  return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1504}
1505 
1506/// Matches shuffle.
1507template <typename T0, typename T1, typename T2> struct Shuffle_match {
1508  T0 Op1;
1509  T1 Op2;
1510  T2 Mask;
1511 
1512  Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask)
1513      : Op1(Op1), Op2(Op2), Mask(Mask) {}
1514 
1515  template <typename OpTy> bool match(OpTy *V) {
1516    if (auto *I = dyn_cast<ShuffleVectorInst>(V)) {
1517      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1518             Mask.match(I->getShuffleMask());
1519    }
1520    return false;
1521  }
1522};
1523 
1524struct m_Mask {
1525  ArrayRef<int> &MaskRef;
1526  m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1527  bool match(ArrayRef<int> Mask) {
1528    MaskRef = Mask;
1529    return true;
1530  }
1531};
1532 
1533struct m_ZeroMask {
1534  bool match(ArrayRef<int> Mask) {
1535    return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; });
1536  }
1537};
1538 
1539struct m_SpecificMask {
1540  ArrayRef<int> &MaskRef;
1541  m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1542  bool match(ArrayRef<int> Mask) { return MaskRef == Mask; }
1543};
1544 
1545struct m_SplatOrUndefMask {
1546  int &SplatIndex;
1547  m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
1548  bool match(ArrayRef<int> Mask) {
1549    auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
1550    if (First == Mask.end())
1551      return false;
1552    SplatIndex = *First;
1553    return all_of(Mask,
1554                  [First](int Elem) { return Elem == *First || Elem == -1; });
1555  }
1556};
1557 
1558/// Matches ShuffleVectorInst independently of mask value.
1559template <typename V1_t, typename V2_t>
1560inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>
1561m_Shuffle(const V1_t &v1, const V2_t &v2) {
1562  return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2);
1563}
1564 
1565template <typename V1_t, typename V2_t, typename Mask_t>
1566inline Shuffle_match<V1_t, V2_t, Mask_t>
1567m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) {
1568  return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask);
1569}
1570 
1571/// Matches LoadInst.
1572template <typename OpTy>
1573inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1574  return OneOps_match<OpTy, Instruction::Load>(Op);
1575}
1576 
1577/// Matches StoreInst.
1578template <typename ValueOpTy, typename PointerOpTy>
1579inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1580m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1581  return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1582                                                                  PointerOp);
1583}
1584 
1585//===----------------------------------------------------------------------===//
1586// Matchers for CastInst classes
1587//
1588 
1589template <typename Op_t, unsigned Opcode> struct CastClass_match {
1590  Op_t Op;
1591 
1592  CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1593 
1594  template <typename OpTy> bool match(OpTy *V) {
1595    if (auto *O = dyn_cast<Operator>(V))
1596      return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1597    return false;
1598  }
1599};
1600 
1601/// Matches BitCast.
1602template <typename OpTy>
1603inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1604  return CastClass_match<OpTy, Instruction::BitCast>(Op);
1605}
1606 
1607/// Matches PtrToInt.
1608template <typename OpTy>
1609inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1610  return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1611}
1612 
1613/// Matches IntToPtr.
1614template <typename OpTy>
1615inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
1616  return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
1617}
1618 
1619/// Matches Trunc.
1620template <typename OpTy>
1621inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1622  return CastClass_match<OpTy, Instruction::Trunc>(Op);
1623}
1624 
1625template <typename OpTy>
1626inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1627m_TruncOrSelf(const OpTy &Op) {
1628  return m_CombineOr(m_Trunc(Op), Op);
1629}
1630 
1631/// Matches SExt.
1632template <typename OpTy>
1633inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1634  return CastClass_match<OpTy, Instruction::SExt>(Op);
1635}
1636 
1637/// Matches ZExt.
1638template <typename OpTy>
1639inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1640  return CastClass_match<OpTy, Instruction::ZExt>(Op);
1641}
1642 
1643template <typename OpTy>
1644inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1645m_ZExtOrSelf(const OpTy &Op) {
1646  return m_CombineOr(m_ZExt(Op), Op);
1647}
1648 
1649template <typename OpTy>
1650inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1651m_SExtOrSelf(const OpTy &Op) {
1652  return m_CombineOr(m_SExt(Op), Op);
1653}
1654 
1655template <typename OpTy>
1656inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1657                        CastClass_match<OpTy, Instruction::SExt>>
1658m_ZExtOrSExt(const OpTy &Op) {
1659  return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1660}
1661 
1662template <typename OpTy>
1663inline match_combine_or<
1664    match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1665                     CastClass_match<OpTy, Instruction::SExt>>,
1666    OpTy>
1667m_ZExtOrSExtOrSelf(const OpTy &Op) {
1668  return m_CombineOr(m_ZExtOrSExt(Op), Op);
1669}
1670 
1671template <typename OpTy>
1672inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1673  return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1674}
1675 
1676template <typename OpTy>
1677inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1678  return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1679}
1680 
1681template <typename OpTy>
1682inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) {
1683  return CastClass_match<OpTy, Instruction::FPToUI>(Op);
1684}
1685 
1686template <typename OpTy>
1687inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) {
1688  return CastClass_match<OpTy, Instruction::FPToSI>(Op);
1689}
1690 
1691template <typename OpTy>
1692inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1693  return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1694}
1695 
1696template <typename OpTy>
1697inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1698  return CastClass_match<OpTy, Instruction::FPExt>(Op);
1699}
1700 
1701//===----------------------------------------------------------------------===//
1702// Matchers for control flow.
1703//
1704 
1705struct br_match {
1706  BasicBlock *&Succ;
1707 
1708  br_match(BasicBlock *&Succ) : Succ(Succ) {}
1709 
1710  template <typename OpTy> bool match(OpTy *V) {
1711    if (auto *BI = dyn_cast<BranchInst>(V))
1712      if (BI->isUnconditional()) {
1713        Succ = BI->getSuccessor(0);
1714        return true;
1715      }
1716    return false;
1717  }
1718};
1719 
1720inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1721 
1722template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1723struct brc_match {
1724  Cond_t Cond;
1725  TrueBlock_t T;
1726  FalseBlock_t F;
1727 
1728  brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1729      : Cond(C), T(t), F(f) {}
1730 
1731  template <typename OpTy> bool match(OpTy *V) {
1732    if (auto *BI = dyn_cast<BranchInst>(V))
1733      if (BI->isConditional() && Cond.match(BI->getCondition()))
1734        return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1735    return false;
1736  }
1737};
1738 
1739template <typename Cond_t>
1740inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1741m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1742  return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1743      C, m_BasicBlock(T), m_BasicBlock(F));
1744}
1745 
1746template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1747inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1748m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1749  return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1750}
1751 
1752//===----------------------------------------------------------------------===//
1753// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1754//
1755 
1756template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1757          bool Commutable = false>
1758struct MaxMin_match {
1759  using PredType = Pred_t;
1760  LHS_t L;
1761  RHS_t R;
1762 
1763  // The evaluation order is always stable, regardless of Commutability.
1764  // The LHS is always matched first.
1765  MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1766 
1767  template <typename OpTy> bool match(OpTy *V) {
1768    if (auto *II = dyn_cast<IntrinsicInst>(V)) {
1769      Intrinsic::ID IID = II->getIntrinsicID();
1770      if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) ||
1771          (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) ||
1772          (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) ||
1773          (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) {
1774        Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1775        return (L.match(LHS) && R.match(RHS)) ||
1776               (Commutable && L.match(RHS) && R.match(LHS));
1777      }
1778    }
1779    // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1780    auto *SI = dyn_cast<SelectInst>(V);
1781    if (!SI)
1782      return false;
1783    auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1784    if (!Cmp)
1785      return false;
1786    // At this point we have a select conditioned on a comparison.  Check that
1787    // it is the values returned by the select that are being compared.
1788    auto *TrueVal = SI->getTrueValue();
1789    auto *FalseVal = SI->getFalseValue();
1790    auto *LHS = Cmp->getOperand(0);
1791    auto *RHS = Cmp->getOperand(1);
1792    if ((TrueVal != LHS || FalseVal != RHS) &&
1793        (TrueVal != RHS || FalseVal != LHS))
1794      return false;
1795    typename CmpInst_t::Predicate Pred =
1796        LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1797    // Does "(x pred y) ? x : y" represent the desired max/min operation?
1798    if (!Pred_t::match(Pred))
1799      return false;
1800    // It does!  Bind the operands.
1801    return (L.match(LHS) && R.match(RHS)) ||
1802           (Commutable && L.match(RHS) && R.match(LHS));
1803  }
1804};
1805 
1806/// Helper class for identifying signed max predicates.
1807struct smax_pred_ty {
1808  static bool match(ICmpInst::Predicate Pred) {
1809    return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1810  }
1811};
1812 
1813/// Helper class for identifying signed min predicates.
1814struct smin_pred_ty {
1815  static bool match(ICmpInst::Predicate Pred) {
1816    return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1817  }
1818};
1819 
1820/// Helper class for identifying unsigned max predicates.
1821struct umax_pred_ty {
1822  static bool match(ICmpInst::Predicate Pred) {
1823    return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1824  }
1825};
1826 
1827/// Helper class for identifying unsigned min predicates.
1828struct umin_pred_ty {
1829  static bool match(ICmpInst::Predicate Pred) {
1830    return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1831  }
1832};
1833 
1834/// Helper class for identifying ordered max predicates.
1835struct ofmax_pred_ty {
1836  static bool match(FCmpInst::Predicate Pred) {
1837    return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1838  }
1839};
1840 
1841/// Helper class for identifying ordered min predicates.
1842struct ofmin_pred_ty {
1843  static bool match(FCmpInst::Predicate Pred) {
1844    return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1845  }
1846};
1847 
1848/// Helper class for identifying unordered max predicates.
1849struct ufmax_pred_ty {
1850  static bool match(FCmpInst::Predicate Pred) {
1851    return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1852  }
1853};
1854 
1855/// Helper class for identifying unordered min predicates.
1856struct ufmin_pred_ty {
1857  static bool match(FCmpInst::Predicate Pred) {
1858    return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1859  }
1860};
1861 
1862template <typename LHS, typename RHS>
1863inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1864                                                             const RHS &R) {
1865  return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1866}
1867 
1868template <typename LHS, typename RHS>
1869inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1870                                                             const RHS &R) {
1871  return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1872}
1873 
1874template <typename LHS, typename RHS>
1875inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1876                                                             const RHS &R) {
1877  return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1878}
1879 
1880template <typename LHS, typename RHS>
1881inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1882                                                             const RHS &R) {
1883  return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1884}
1885 
1886template <typename LHS, typename RHS>
1887inline match_combine_or<
1888    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
1889                     MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>,
1890    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
1891                     MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>>
1892m_MaxOrMin(const LHS &L, const RHS &R) {
1893  return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)),
1894                     m_CombineOr(m_UMax(L, R), m_UMin(L, R)));
1895}
1896 
1897/// Match an 'ordered' floating point maximum function.
1898/// Floating point has one special value 'NaN'. Therefore, there is no total
1899/// order. However, if we can ignore the 'NaN' value (for example, because of a
1900/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1901/// semantics. In the presence of 'NaN' we have to preserve the original
1902/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1903///
1904///                         max(L, R)  iff L and R are not NaN
1905///  m_OrdFMax(L, R) =      R          iff L or R are NaN
1906template <typename LHS, typename RHS>
1907inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1908                                                                 const RHS &R) {
1909  return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1910}
1911 
1912/// Match an 'ordered' floating point minimum function.
1913/// Floating point has one special value 'NaN'. Therefore, there is no total
1914/// order. However, if we can ignore the 'NaN' value (for example, because of a
1915/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1916/// semantics. In the presence of 'NaN' we have to preserve the original
1917/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1918///
1919///                         min(L, R)  iff L and R are not NaN
1920///  m_OrdFMin(L, R) =      R          iff L or R are NaN
1921template <typename LHS, typename RHS>
1922inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1923                                                                 const RHS &R) {
1924  return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1925}
1926 
1927/// Match an 'unordered' floating point maximum function.
1928/// Floating point has one special value 'NaN'. Therefore, there is no total
1929/// order. However, if we can ignore the 'NaN' value (for example, because of a
1930/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1931/// semantics. In the presence of 'NaN' we have to preserve the original
1932/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1933///
1934///                         max(L, R)  iff L and R are not NaN
1935///  m_UnordFMax(L, R) =    L          iff L or R are NaN
1936template <typename LHS, typename RHS>
1937inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1938m_UnordFMax(const LHS &L, const RHS &R) {
1939  return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1940}
1941 
1942/// Match an 'unordered' floating point minimum function.
1943/// Floating point has one special value 'NaN'. Therefore, there is no total
1944/// order. However, if we can ignore the 'NaN' value (for example, because of a
1945/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1946/// semantics. In the presence of 'NaN' we have to preserve the original
1947/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1948///
1949///                          min(L, R)  iff L and R are not NaN
1950///  m_UnordFMin(L, R) =     L          iff L or R are NaN
1951template <typename LHS, typename RHS>
1952inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1953m_UnordFMin(const LHS &L, const RHS &R) {
1954  return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1955}
1956 
1957//===----------------------------------------------------------------------===//
1958// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
1959// Note that S might be matched to other instructions than AddInst.
1960//
1961 
1962template <typename LHS_t, typename RHS_t, typename Sum_t>
1963struct UAddWithOverflow_match {
1964  LHS_t L;
1965  RHS_t R;
1966  Sum_t S;
1967 
1968  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1969      : L(L), R(R), S(S) {}
1970 
1971  template <typename OpTy> bool match(OpTy *V) {
1972    Value *ICmpLHS, *ICmpRHS;
1973    ICmpInst::Predicate Pred;
1974    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1975      return false;
1976 
1977    Value *AddLHS, *AddRHS;
1978    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1979 
1980    // (a + b) u< a, (a + b) u< b
1981    if (Pred == ICmpInst::ICMP_ULT)
1982      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1983        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1984 
1985    // a >u (a + b), b >u (a + b)
1986    if (Pred == ICmpInst::ICMP_UGT)
1987      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1988        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1989 
1990    Value *Op1;
1991    auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
1992    // (a ^ -1) <u b
1993    if (Pred == ICmpInst::ICMP_ULT) {
1994      if (XorExpr.match(ICmpLHS))
1995        return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
1996    }
1997    //  b > u (a ^ -1)
1998    if (Pred == ICmpInst::ICMP_UGT) {
1999      if (XorExpr.match(ICmpRHS))
2000        return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
2001    }
2002 
2003    // Match special-case for increment-by-1.
2004    if (Pred == ICmpInst::ICMP_EQ) {
2005      // (a + 1) == 0
2006      // (1 + a) == 0
2007      if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
2008          (m_One().match(AddLHS) || m_One().match(AddRHS)))
2009        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2010      // 0 == (a + 1)
2011      // 0 == (1 + a)
2012      if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
2013          (m_One().match(AddLHS) || m_One().match(AddRHS)))
2014        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2015    }
2016 
2017    return false;
2018  }
2019};
2020 
2021/// Match an icmp instruction checking for unsigned overflow on addition.
2022///
2023/// S is matched to the addition whose result is being checked for overflow, and
2024/// L and R are matched to the LHS and RHS of S.
2025template <typename LHS_t, typename RHS_t, typename Sum_t>
2026UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
2027m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
2028  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
2029}
2030 
2031template <typename Opnd_t> struct Argument_match {
2032  unsigned OpI;
2033  Opnd_t Val;
2034 
2035  Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
2036 
2037  template <typename OpTy> bool match(OpTy *V) {
2038    // FIXME: Should likely be switched to use `CallBase`.
2039    if (const auto *CI = dyn_cast<CallInst>(V))
2040      return Val.match(CI->getArgOperand(OpI));
2041    return false;
2042  }
2043};
2044 
2045/// Match an argument.
2046template <unsigned OpI, typename Opnd_t>
2047inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
2048  return Argument_match<Opnd_t>(OpI, Op);
2049}
2050 
2051/// Intrinsic matchers.
2052struct IntrinsicID_match {
2053  unsigned ID;
2054 
2055  IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
2056 
2057  template <typename OpTy> bool match(OpTy *V) {
2058    if (const auto *CI = dyn_cast<CallInst>(V))
2059      if (const auto *F = CI->getCalledFunction())
2060        return F->getIntrinsicID() == ID;
2061    return false;
2062  }
2063};
2064 
2065/// Intrinsic matches are combinations of ID matchers, and argument
2066/// matchers. Higher arity matcher are defined recursively in terms of and-ing
2067/// them with lower arity matchers. Here's some convenient typedefs for up to
2068/// several arguments, and more can be added as needed
2069template <typename T0 = void, typename T1 = void, typename T2 = void,
2070          typename T3 = void, typename T4 = void, typename T5 = void,
2071          typename T6 = void, typename T7 = void, typename T8 = void,
2072          typename T9 = void, typename T10 = void>
2073struct m_Intrinsic_Ty;
2074template <typename T0> struct m_Intrinsic_Ty<T0> {
2075  using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
2076};
2077template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
2078  using Ty =
2079      match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
2080};
2081template <typename T0, typename T1, typename T2>
2082struct m_Intrinsic_Ty<T0, T1, T2> {
2083  using Ty =
2084      match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
2085                        Argument_match<T2>>;
2086};
2087template <typename T0, typename T1, typename T2, typename T3>
2088struct m_Intrinsic_Ty<T0, T1, T2, T3> {
2089  using Ty =
2090      match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
2091                        Argument_match<T3>>;
2092};
2093 
2094template <typename T0, typename T1, typename T2, typename T3, typename T4>
2095struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
2096  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
2097                               Argument_match<T4>>;
2098};
2099 
2100template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
2101struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> {
2102  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty,
2103                               Argument_match<T5>>;
2104};
2105 
2106/// Match intrinsic calls like this:
2107/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
2108template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
2109  return IntrinsicID_match(IntrID);
2110}
2111 
2112/// Matches MaskedLoad Intrinsic.
2113template <typename Opnd0, typename Opnd1, typename Opnd2, typename Opnd3>
2114inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2, Opnd3>::Ty
2115m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2,
2116             const Opnd3 &Op3) {
2117  return m_Intrinsic<Intrinsic::masked_load>(Op0, Op1, Op2, Op3);
2118}
2119 
2120template <Intrinsic::ID IntrID, typename T0>
2121inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
2122  return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
2123}
2124 
2125template <Intrinsic::ID IntrID, typename T0, typename T1>
2126inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
2127                                                       const T1 &Op1) {
2128  return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
2129}
2130 
2131template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
2132inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
2133m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
2134  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
2135}
2136 
2137template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2138          typename T3>
2139inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
2140m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
2141  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
2142}
2143 
2144template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2145          typename T3, typename T4>
2146inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
2147m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2148            const T4 &Op4) {
2149  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
2150                      m_Argument<4>(Op4));
2151}
2152 
2153template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2154          typename T3, typename T4, typename T5>
2155inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty
2156m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2157            const T4 &Op4, const T5 &Op5) {
2158  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4),
2159                      m_Argument<5>(Op5));
2160}
2161 
2162// Helper intrinsic matching specializations.
2163template <typename Opnd0>
2164inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
2165  return m_Intrinsic<Intrinsic::bitreverse>(Op0);
2166}
2167 
2168template <typename Opnd0>
2169inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
2170  return m_Intrinsic<Intrinsic::bswap>(Op0);
2171}
2172 
2173template <typename Opnd0>
2174inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
2175  return m_Intrinsic<Intrinsic::fabs>(Op0);
2176}
2177 
2178template <typename Opnd0>
2179inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
2180  return m_Intrinsic<Intrinsic::canonicalize>(Op0);
2181}
2182 
2183template <typename Opnd0, typename Opnd1>
2184inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
2185                                                        const Opnd1 &Op1) {
2186  return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
2187}
2188 
2189template <typename Opnd0, typename Opnd1>
2190inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
2191                                                        const Opnd1 &Op1) {
2192  return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
2193}
2194 
2195template <typename Opnd0, typename Opnd1, typename Opnd2>
2196inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2197m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2198  return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2);
2199}
2200 
2201template <typename Opnd0, typename Opnd1, typename Opnd2>
2202inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2203m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2204  return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
2205}
2206 
2207//===----------------------------------------------------------------------===//
2208// Matchers for two-operands operators with the operators in either order
2209//
2210 
2211/// Matches a BinaryOperator with LHS and RHS in either order.
2212template <typename LHS, typename RHS>
2213inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
2214  return AnyBinaryOp_match<LHS, RHS, true>(L, R);
2215}
2216 
2217/// Matches an ICmp with a predicate over LHS and RHS in either order.
2218/// Swaps the predicate if operands are commuted.
2219template <typename LHS, typename RHS>
2220inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
2221m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
2222  return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
2223                                                                       R);
2224}
2225 
2226/// Matches a Add with LHS and RHS in either order.
2227template <typename LHS, typename RHS>
2228inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
2229                                                                const RHS &R) {
2230  return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
2231}
2232 
2233/// Matches a Mul with LHS and RHS in either order.
2234template <typename LHS, typename RHS>
2235inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
2236                                                                const RHS &R) {
2237  return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
2238}
2239 
2240/// Matches an And with LHS and RHS in either order.
2241template <typename LHS, typename RHS>
2242inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
2243                                                                const RHS &R) {
2244  return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
2245}
2246 
2247/// Matches an Or with LHS and RHS in either order.
2248template <typename LHS, typename RHS>
2249inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
2250                                                              const RHS &R) {
2251  return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
2252}
2253 
2254/// Matches an Xor with LHS and RHS in either order.
2255template <typename LHS, typename RHS>
2256inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
2257                                                                const RHS &R) {
2258  return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
2259}
2260 
2261/// Matches a 'Neg' as 'sub 0, V'.
2262template <typename ValTy>
2263inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
2264m_Neg(const ValTy &V) {
2265  return m_Sub(m_ZeroInt(), V);
2266}
2267 
2268/// Matches a 'Neg' as 'sub nsw 0, V'.
2269template <typename ValTy>
2270inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy,
2271                                 Instruction::Sub,
2272                                 OverflowingBinaryOperator::NoSignedWrap>
2273m_NSWNeg(const ValTy &V) {
2274  return m_NSWSub(m_ZeroInt(), V);
2275}
2276 
2277/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
2278template <typename ValTy>
2279inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
2280m_Not(const ValTy &V) {
2281  return m_c_Xor(V, m_AllOnes());
2282}
2283 
2284/// Matches an SMin with LHS and RHS in either order.
2285template <typename LHS, typename RHS>
2286inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
2287m_c_SMin(const LHS &L, const RHS &R) {
2288  return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
2289}
2290/// Matches an SMax with LHS and RHS in either order.
2291template <typename LHS, typename RHS>
2292inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
2293m_c_SMax(const LHS &L, const RHS &R) {
2294  return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
2295}
2296/// Matches a UMin with LHS and RHS in either order.
2297template <typename LHS, typename RHS>
2298inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
2299m_c_UMin(const LHS &L, const RHS &R) {
2300  return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
2301}
2302/// Matches a UMax with LHS and RHS in either order.
2303template <typename LHS, typename RHS>
2304inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
2305m_c_UMax(const LHS &L, const RHS &R) {
2306  return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
2307}
2308 
2309template <typename LHS, typename RHS>
2310inline match_combine_or<
2311    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>,
2312                     MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>,
2313    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>,
2314                     MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>>
2315m_c_MaxOrMin(const LHS &L, const RHS &R) {
2316  return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)),
2317                     m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
2318}
2319 
2320/// Matches FAdd with LHS and RHS in either order.
2321template <typename LHS, typename RHS>
2322inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
2323m_c_FAdd(const LHS &L, const RHS &R) {
2324  return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
2325}
2326 
2327/// Matches FMul with LHS and RHS in either order.
2328template <typename LHS, typename RHS>
2329inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
2330m_c_FMul(const LHS &L, const RHS &R) {
2331  return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
2332}
2333 
2334template <typename Opnd_t> struct Signum_match {
2335  Opnd_t Val;
2336  Signum_match(const Opnd_t &V) : Val(V) {}
2337 
2338  template <typename OpTy> bool match(OpTy *V) {
2339    unsigned TypeSize = V->getType()->getScalarSizeInBits();
2340    if (TypeSize == 0)
2341      return false;
2342 
2343    unsigned ShiftWidth = TypeSize - 1;
2344    Value *OpL = nullptr, *OpR = nullptr;
2345 
2346    // This is the representation of signum we match:
2347    //
2348    //  signum(x) == (x >> 63) | (-x >>u 63)
2349    //
2350    // An i1 value is its own signum, so it's correct to match
2351    //
2352    //  signum(x) == (x >> 0)  | (-x >>u 0)
2353    //
2354    // for i1 values.
2355 
2356    auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
2357    auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
2358    auto Signum = m_Or(LHS, RHS);
2359 
2360    return Signum.match(V) && OpL == OpR && Val.match(OpL);
2361  }
2362};
2363 
2364/// Matches a signum pattern.
2365///
2366/// signum(x) =
2367///      x >  0  ->  1
2368///      x == 0  ->  0
2369///      x <  0  -> -1
2370template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
2371  return Signum_match<Val_t>(V);
2372}
2373 
2374template <int Ind, typename Opnd_t> struct ExtractValue_match {
2375  Opnd_t Val;
2376  ExtractValue_match(const Opnd_t &V) : Val(V) {}
2377 
2378  template <typename OpTy> bool match(OpTy *V) {
2379    if (auto *I = dyn_cast<ExtractValueInst>(V)) {
2380      // If Ind is -1, don't inspect indices
2381      if (Ind != -1 &&
2382          !(I->getNumIndices() == 1 && I->getIndices()[0] == (unsigned)Ind))
2383        return false;
2384      return Val.match(I->getAggregateOperand());
2385    }
2386    return false;
2387  }
2388};
2389 
2390/// Match a single index ExtractValue instruction.
2391/// For example m_ExtractValue<1>(...)
2392template <int Ind, typename Val_t>
2393inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2394  return ExtractValue_match<Ind, Val_t>(V);
2395}
2396 
2397/// Match an ExtractValue instruction with any index.
2398/// For example m_ExtractValue(...)
2399template <typename Val_t>
2400inline ExtractValue_match<-1, Val_t> m_ExtractValue(const Val_t &V) {
2401  return ExtractValue_match<-1, Val_t>(V);
2402}
2403 
2404/// Matcher for a single index InsertValue instruction.
2405template <int Ind, typename T0, typename T1> struct InsertValue_match {
2406  T0 Op0;
2407  T1 Op1;
2408 
2409  InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {}
2410 
2411  template <typename OpTy> bool match(OpTy *V) {
2412    if (auto *I = dyn_cast<InsertValueInst>(V)) {
2413      return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) &&
2414             I->getNumIndices() == 1 && Ind == I->getIndices()[0];
2415    }
2416    return false;
2417  }
2418};
2419 
2420/// Matches a single index InsertValue instruction.
2421template <int Ind, typename Val_t, typename Elt_t>
2422inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
2423                                                          const Elt_t &Elt) {
2424  return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt);
2425}
2426 
2427/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
2428/// the constant expression
2429///  `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
2430/// under the right conditions determined by DataLayout.
2431struct VScaleVal_match {
2432  const DataLayout &DL;
2433  VScaleVal_match(const DataLayout &DL) : DL(DL) {}
2434 
2435  template <typename ITy> bool match(ITy *V) {
2436    if (m_Intrinsic<Intrinsic::vscale>().match(V))
2437      return true;
2438 
2439    Value *Ptr;
2440    if (m_PtrToInt(m_Value(Ptr)).match(V)) {
2441      if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
2442        auto *DerefTy = GEP->getSourceElementType();
2443        if (GEP->getNumIndices() == 1 && isa<ScalableVectorType>(DerefTy) &&
2444            m_Zero().match(GEP->getPointerOperand()) &&
2445            m_SpecificInt(1).match(GEP->idx_begin()->get()) &&
2446            DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8)
2447          return true;
2448      }
2449    }
2450 
2451    return false;
2452  }
2453};
2454 
2455inline VScaleVal_match m_VScale(const DataLayout &DL) {
2456  return VScaleVal_match(DL);
2457}
2458 
2459template <typename LHS, typename RHS, unsigned Opcode>
2460struct LogicalOp_match {
2461  LHS L;
2462  RHS R;
2463 
2464  LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
2465 
2466  template <typename T> bool match(T *V) {
2467    if (auto *I = dyn_cast<Instruction>(V)) {
2468      if (!I->getType()->isIntOrIntVectorTy(1))
2469        return false;
2470 
2471      if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
2472          R.match(I->getOperand(1)))
2473        return true;
2474 
2475      if (auto *SI = dyn_cast<SelectInst>(I)) {
2476        if (Opcode == Instruction::And) {
2477          if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
2478            if (C->isNullValue() && L.match(SI->getCondition()) &&
2479                R.match(SI->getTrueValue()))
2480              return true;
2481        } else {
2482          assert(Opcode == Instruction::Or)((void)0);
2483          if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
2484            if (C->isOneValue() && L.match(SI->getCondition()) &&
2485                R.match(SI->getFalseValue()))
2486              return true;
2487        }
2488      }
2489    }
2490 
2491    return false;
2492  }
2493};
2494 
2495/// Matches L && R either in the form of L & R or L ? R : false.
2496/// Note that the latter form is poison-blocking.
2497template <typename LHS, typename RHS>
2498inline LogicalOp_match<LHS, RHS, Instruction::And>
2499m_LogicalAnd(const LHS &L, const RHS &R) {
2500  return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
2501}
2502 
2503/// Matches L && R where L and R are arbitrary values.
2504inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
2505 
2506/// Matches L || R either in the form of L | R or L ? true : R.
2507/// Note that the latter form is poison-blocking.
2508template <typename LHS, typename RHS>
2509inline LogicalOp_match<LHS, RHS, Instruction::Or>
2510m_LogicalOr(const LHS &L, const RHS &R) {
2511  return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
2512}
2513 
2514/// Matches L || R where L and R are arbitrary values.
2515inline auto m_LogicalOr() {
2516  return m_LogicalOr(m_Value(), m_Value());
2517}
2518 
2519} // end namespace PatternMatch
2520} // end namespace llvm
2521 
2522#endif // LLVM_IR_PATTERNMATCH_H