Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1335, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46#include "llvm/CodeGen/TargetLowering.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSubtargetInfo.h"
49#include "llvm/CodeGen/ValueTypes.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CodeGen.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/KnownBits.h"
64#include "llvm/Support/MachineValueType.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Target/TargetMachine.h"
68#include "llvm/Target/TargetOptions.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
87STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG1
99static cl::opt<std::string>
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
121static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
125static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
130static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 "sequence"));
134
135static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG cominber enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&((void)0)
230 "Found a worklist entry without a corresponding map entry!")((void)0);
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 }
255
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
259 }
260
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&((void)0)
265 "Deleted Node added to Worklist")((void)0);
266
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
271
272 ConsiderForPruning(N);
273
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
276 }
277
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
283
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
287
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
291 }
292
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
299
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
303 }
304
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
310 }
311
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314 private:
315 unsigned MaximumLegalStoreInBits;
316
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
331
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
334
335 CommitTargetLoweringOpt(TLO);
336 return true;
337 }
338
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
346
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 }
351
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
362
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
370 ///
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
387
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
392
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
399 //
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitTRUNCATE(SDNode *N);
466 SDValue visitBITCAST(SDNode *N);
467 SDValue visitFREEZE(SDNode *N);
468 SDValue visitBUILD_PAIR(SDNode *N);
469 SDValue visitFADD(SDNode *N);
470 SDValue visitSTRICT_FADD(SDNode *N);
471 SDValue visitFSUB(SDNode *N);
472 SDValue visitFMUL(SDNode *N);
473 SDValue visitFMA(SDNode *N);
474 SDValue visitFDIV(SDNode *N);
475 SDValue visitFREM(SDNode *N);
476 SDValue visitFSQRT(SDNode *N);
477 SDValue visitFCOPYSIGN(SDNode *N);
478 SDValue visitFPOW(SDNode *N);
479 SDValue visitSINT_TO_FP(SDNode *N);
480 SDValue visitUINT_TO_FP(SDNode *N);
481 SDValue visitFP_TO_SINT(SDNode *N);
482 SDValue visitFP_TO_UINT(SDNode *N);
483 SDValue visitFP_ROUND(SDNode *N);
484 SDValue visitFP_EXTEND(SDNode *N);
485 SDValue visitFNEG(SDNode *N);
486 SDValue visitFABS(SDNode *N);
487 SDValue visitFCEIL(SDNode *N);
488 SDValue visitFTRUNC(SDNode *N);
489 SDValue visitFFLOOR(SDNode *N);
490 SDValue visitFMINNUM(SDNode *N);
491 SDValue visitFMAXNUM(SDNode *N);
492 SDValue visitFMINIMUM(SDNode *N);
493 SDValue visitFMAXIMUM(SDNode *N);
494 SDValue visitBRCOND(SDNode *N);
495 SDValue visitBR_CC(SDNode *N);
496 SDValue visitLOAD(SDNode *N);
497
498 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500
501 SDValue visitSTORE(SDNode *N);
502 SDValue visitLIFETIME_END(SDNode *N);
503 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505 SDValue visitBUILD_VECTOR(SDNode *N);
506 SDValue visitCONCAT_VECTORS(SDNode *N);
507 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508 SDValue visitVECTOR_SHUFFLE(SDNode *N);
509 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510 SDValue visitINSERT_SUBVECTOR(SDNode *N);
511 SDValue visitMLOAD(SDNode *N);
512 SDValue visitMSTORE(SDNode *N);
513 SDValue visitMGATHER(SDNode *N);
514 SDValue visitMSCATTER(SDNode *N);
515 SDValue visitFP_TO_FP16(SDNode *N);
516 SDValue visitFP16_TO_FP(SDNode *N);
517 SDValue visitVECREDUCE(SDNode *N);
518
519 SDValue visitFADDForFMACombine(SDNode *N);
520 SDValue visitFSUBForFMACombine(SDNode *N);
521 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
522
523 SDValue XformToShuffleWithZero(SDNode *N);
524 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
525 const SDLoc &DL, SDValue N0,
526 SDValue N1);
527 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528 SDValue N1);
529 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530 SDValue N1, SDNodeFlags Flags);
531
532 SDValue visitShiftByConstant(SDNode *N);
533
534 SDValue foldSelectOfConstants(SDNode *N);
535 SDValue foldVSelectOfConstants(SDNode *N);
536 SDValue foldBinOpIntoSelect(SDNode *BO);
537 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541 SDValue N2, SDValue N3, ISD::CondCode CC,
542 bool NotExtCompare = false);
543 SDValue convertSelectOfFPConstantsToLoadOffset(
544 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545 ISD::CondCode CC);
546 SDValue foldSignChangeInBitcast(SDNode *N);
547 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548 SDValue N2, SDValue N3, ISD::CondCode CC);
549 SDValue foldSelectOfBinops(SDNode *N);
550 SDValue foldSextSetcc(SDNode *N);
551 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552 const SDLoc &DL);
553 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554 SDValue unfoldMaskedMerge(SDNode *N);
555 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557 const SDLoc &DL, bool foldBooleans);
558 SDValue rebuildSetCC(SDValue N);
559
560 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561 SDValue &CC, bool MatchStrict = false) const;
562 bool isOneUseSetCC(SDValue N) const;
563
564 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565 unsigned HiOp);
566 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567 SDValue CombineExtLoad(SDNode *N);
568 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569 SDValue combineRepeatedFPDivisors(SDNode *N);
570 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572 SDValue BuildSDIV(SDNode *N);
573 SDValue BuildSDIVPow2(SDNode *N);
574 SDValue BuildUDIV(SDNode *N);
575 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
576 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
577 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
579 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
580 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
581 SDNodeFlags Flags, bool Reciprocal);
582 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
583 SDNodeFlags Flags, bool Reciprocal);
584 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
585 bool DemandHighBits = true);
586 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
587 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
588 SDValue InnerPos, SDValue InnerNeg,
589 unsigned PosOpcode, unsigned NegOpcode,
590 const SDLoc &DL);
591 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
592 SDValue InnerPos, SDValue InnerNeg,
593 unsigned PosOpcode, unsigned NegOpcode,
594 const SDLoc &DL);
595 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
596 SDValue MatchLoadCombine(SDNode *N);
597 SDValue mergeTruncStores(StoreSDNode *N);
598 SDValue ReduceLoadWidth(SDNode *N);
599 SDValue ReduceLoadOpStoreWidth(SDNode *N);
600 SDValue splitMergedValStore(StoreSDNode *ST);
601 SDValue TransformFPLoadStorePair(SDNode *N);
602 SDValue convertBuildVecZextToZext(SDNode *N);
603 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
604 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
605 SDValue reduceBuildVecToShuffle(SDNode *N);
606 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
607 ArrayRef<int> VectorMask, SDValue VecIn1,
608 SDValue VecIn2, unsigned LeftIdx,
609 bool DidSplitVec);
610 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
611
612 /// Walk up chain skipping non-aliasing memory nodes,
613 /// looking for aliasing nodes and adding them to the Aliases vector.
614 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
615 SmallVectorImpl<SDValue> &Aliases);
616
617 /// Return true if there is any possibility that the two addresses overlap.
618 bool isAlias(SDNode *Op0, SDNode *Op1) const;
619
620 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
621 /// chain (aliasing node.)
622 SDValue FindBetterChain(SDNode *N, SDValue Chain);
623
624 /// Try to replace a store and any possibly adjacent stores on
625 /// consecutive chains with better chains. Return true only if St is
626 /// replaced.
627 ///
628 /// Notice that other chains may still be replaced even if the function
629 /// returns false.
630 bool findBetterNeighborChains(StoreSDNode *St);
631
632 // Helper for findBetterNeighborChains. Walk up store chain add additional
633 // chained stores that do not overlap and can be parallelized.
634 bool parallelizeChainedStores(StoreSDNode *St);
635
636 /// Holds a pointer to an LSBaseSDNode as well as information on where it
637 /// is located in a sequence of memory operations connected by a chain.
638 struct MemOpLink {
639 // Ptr to the mem node.
640 LSBaseSDNode *MemNode;
641
642 // Offset from the base ptr.
643 int64_t OffsetFromBase;
644
645 MemOpLink(LSBaseSDNode *N, int64_t Offset)
646 : MemNode(N), OffsetFromBase(Offset) {}
647 };
648
649 // Classify the origin of a stored value.
650 enum class StoreSource { Unknown, Constant, Extract, Load };
651 StoreSource getStoreSource(SDValue StoreVal) {
652 switch (StoreVal.getOpcode()) {
653 case ISD::Constant:
654 case ISD::ConstantFP:
655 return StoreSource::Constant;
656 case ISD::EXTRACT_VECTOR_ELT:
657 case ISD::EXTRACT_SUBVECTOR:
658 return StoreSource::Extract;
659 case ISD::LOAD:
660 return StoreSource::Load;
661 default:
662 return StoreSource::Unknown;
663 }
664 }
665
666 /// This is a helper function for visitMUL to check the profitability
667 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
668 /// MulNode is the original multiply, AddNode is (add x, c1),
669 /// and ConstNode is c2.
670 bool isMulAddWithConstProfitable(SDNode *MulNode,
671 SDValue &AddNode,
672 SDValue &ConstNode);
673
674 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
675 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
676 /// the type of the loaded value to be extended.
677 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
678 EVT LoadResultTy, EVT &ExtVT);
679
680 /// Helper function to calculate whether the given Load/Store can have its
681 /// width reduced to ExtVT.
682 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
683 EVT &MemVT, unsigned ShAmt = 0);
684
685 /// Used by BackwardsPropagateMask to find suitable loads.
686 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
687 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
688 ConstantSDNode *Mask, SDNode *&NodeToMask);
689 /// Attempt to propagate a given AND node back to load leaves so that they
690 /// can be combined into narrow loads.
691 bool BackwardsPropagateMask(SDNode *N);
692
693 /// Helper function for mergeConsecutiveStores which merges the component
694 /// store chains.
695 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
696 unsigned NumStores);
697
698 /// This is a helper function for mergeConsecutiveStores. When the source
699 /// elements of the consecutive stores are all constants or all extracted
700 /// vector elements, try to merge them into one larger store introducing
701 /// bitcasts if necessary. \return True if a merged store was created.
702 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
703 EVT MemVT, unsigned NumStores,
704 bool IsConstantSrc, bool UseVector,
705 bool UseTrunc);
706
707 /// This is a helper function for mergeConsecutiveStores. Stores that
708 /// potentially may be merged with St are placed in StoreNodes. RootNode is
709 /// a chain predecessor to all store candidates.
710 void getStoreMergeCandidates(StoreSDNode *St,
711 SmallVectorImpl<MemOpLink> &StoreNodes,
712 SDNode *&Root);
713
714 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
715 /// have indirect dependency through their operands. RootNode is the
716 /// predecessor to all stores calculated by getStoreMergeCandidates and is
717 /// used to prune the dependency check. \return True if safe to merge.
718 bool checkMergeStoreCandidatesForDependencies(
719 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
720 SDNode *RootNode);
721
722 /// This is a helper function for mergeConsecutiveStores. Given a list of
723 /// store candidates, find the first N that are consecutive in memory.
724 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
725 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
726 int64_t ElementSizeBytes) const;
727
728 /// This is a helper function for mergeConsecutiveStores. It is used for
729 /// store chains that are composed entirely of constant values.
730 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
731 unsigned NumConsecutiveStores,
732 EVT MemVT, SDNode *Root, bool AllowVectors);
733
734 /// This is a helper function for mergeConsecutiveStores. It is used for
735 /// store chains that are composed entirely of extracted vector elements.
736 /// When extracting multiple vector elements, try to store them in one
737 /// vector store rather than a sequence of scalar stores.
738 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
739 unsigned NumConsecutiveStores, EVT MemVT,
740 SDNode *Root);
741
742 /// This is a helper function for mergeConsecutiveStores. It is used for
743 /// store chains that are composed entirely of loaded values.
744 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
745 unsigned NumConsecutiveStores, EVT MemVT,
746 SDNode *Root, bool AllowVectors,
747 bool IsNonTemporalStore, bool IsNonTemporalLoad);
748
749 /// Merge consecutive store operations into a wide store.
750 /// This optimization uses wide integers or vectors when possible.
751 /// \return true if stores were merged.
752 bool mergeConsecutiveStores(StoreSDNode *St);
753
754 /// Try to transform a truncation where C is a constant:
755 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
756 ///
757 /// \p N needs to be a truncation and its first operand an AND. Other
758 /// requirements are checked by the function (e.g. that trunc is
759 /// single-use) and if missed an empty SDValue is returned.
760 SDValue distributeTruncateThroughAnd(SDNode *N);
761
762 /// Helper function to determine whether the target supports operation
763 /// given by \p Opcode for type \p VT, that is, whether the operation
764 /// is legal or custom before legalizing operations, and whether is
765 /// legal (but not custom) after legalization.
766 bool hasOperation(unsigned Opcode, EVT VT) {
767 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
768 }
769
770 public:
771 /// Runs the dag combiner on all nodes in the work list
772 void Run(CombineLevel AtLevel);
773
774 SelectionDAG &getDAG() const { return DAG; }
775
776 /// Returns a type large enough to hold any valid shift amount - before type
777 /// legalization these can be huge.
778 EVT getShiftAmountTy(EVT LHSTy) {
779 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")((void)0);
780 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
781 }
782
783 /// This method returns true if we are running before type legalization or
784 /// if the specified VT is legal.
785 bool isTypeLegal(const EVT &VT) {
786 if (!LegalTypes) return true;
787 return TLI.isTypeLegal(VT);
788 }
789
790 /// Convenience wrapper around TargetLowering::getSetCCResultType
791 EVT getSetCCResultType(EVT VT) const {
792 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
793 }
794
795 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
796 SDValue OrigLoad, SDValue ExtLoad,
797 ISD::NodeType ExtType);
798 };
799
800/// This class is a DAGUpdateListener that removes any deleted
801/// nodes from the worklist.
802class WorklistRemover : public SelectionDAG::DAGUpdateListener {
803 DAGCombiner &DC;
804
805public:
806 explicit WorklistRemover(DAGCombiner &dc)
807 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
808
809 void NodeDeleted(SDNode *N, SDNode *E) override {
810 DC.removeFromWorklist(N);
811 }
812};
813
814class WorklistInserter : public SelectionDAG::DAGUpdateListener {
815 DAGCombiner &DC;
816
817public:
818 explicit WorklistInserter(DAGCombiner &dc)
819 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
820
821 // FIXME: Ideally we could add N to the worklist, but this causes exponential
822 // compile time costs in large DAGs, e.g. Halide.
823 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
824};
825
826} // end anonymous namespace
827
828//===----------------------------------------------------------------------===//
829// TargetLowering::DAGCombinerInfo implementation
830//===----------------------------------------------------------------------===//
831
832void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
833 ((DAGCombiner*)DC)->AddToWorklist(N);
834}
835
836SDValue TargetLowering::DAGCombinerInfo::
837CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
838 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
839}
840
841SDValue TargetLowering::DAGCombinerInfo::
842CombineTo(SDNode *N, SDValue Res, bool AddTo) {
843 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
844}
845
846SDValue TargetLowering::DAGCombinerInfo::
847CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
848 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
849}
850
851bool TargetLowering::DAGCombinerInfo::
852recursivelyDeleteUnusedNodes(SDNode *N) {
853 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
854}
855
856void TargetLowering::DAGCombinerInfo::
857CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
858 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
859}
860
861//===----------------------------------------------------------------------===//
862// Helper Functions
863//===----------------------------------------------------------------------===//
864
865void DAGCombiner::deleteAndRecombine(SDNode *N) {
866 removeFromWorklist(N);
867
868 // If the operands of this node are only used by the node, they will now be
869 // dead. Make sure to re-visit them and recursively delete dead nodes.
870 for (const SDValue &Op : N->ops())
871 // For an operand generating multiple values, one of the values may
872 // become dead allowing further simplification (e.g. split index
873 // arithmetic from an indexed load).
874 if (Op->hasOneUse() || Op->getNumValues() > 1)
875 AddToWorklist(Op.getNode());
876
877 DAG.DeleteNode(N);
878}
879
880// APInts must be the same size for most operations, this helper
881// function zero extends the shorter of the pair so that they match.
882// We provide an Offset so that we can create bitwidths that won't overflow.
883static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
884 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
885 LHS = LHS.zextOrSelf(Bits);
886 RHS = RHS.zextOrSelf(Bits);
887}
888
889// Return true if this node is a setcc, or is a select_cc
890// that selects between the target values used for true and false, making it
891// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
892// the appropriate nodes based on the type of node we are checking. This
893// simplifies life a bit for the callers.
894bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
895 SDValue &CC, bool MatchStrict) const {
896 if (N.getOpcode() == ISD::SETCC) {
897 LHS = N.getOperand(0);
898 RHS = N.getOperand(1);
899 CC = N.getOperand(2);
900 return true;
901 }
902
903 if (MatchStrict &&
904 (N.getOpcode() == ISD::STRICT_FSETCC ||
905 N.getOpcode() == ISD::STRICT_FSETCCS)) {
906 LHS = N.getOperand(1);
907 RHS = N.getOperand(2);
908 CC = N.getOperand(3);
909 return true;
910 }
911
912 if (N.getOpcode() != ISD::SELECT_CC ||
913 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
914 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
915 return false;
916
917 if (TLI.getBooleanContents(N.getValueType()) ==
918 TargetLowering::UndefinedBooleanContent)
919 return false;
920
921 LHS = N.getOperand(0);
922 RHS = N.getOperand(1);
923 CC = N.getOperand(4);
924 return true;
925}
926
927/// Return true if this is a SetCC-equivalent operation with only one use.
928/// If this is true, it allows the users to invert the operation for free when
929/// it is profitable to do so.
930bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931 SDValue N0, N1, N2;
932 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
933 return true;
934 return false;
935}
936
937static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938 if (!ScalarTy.isSimple())
939 return false;
940
941 uint64_t MaskForTy = 0ULL;
942 switch (ScalarTy.getSimpleVT().SimpleTy) {
943 case MVT::i8:
944 MaskForTy = 0xFFULL;
945 break;
946 case MVT::i16:
947 MaskForTy = 0xFFFFULL;
948 break;
949 case MVT::i32:
950 MaskForTy = 0xFFFFFFFFULL;
951 break;
952 default:
953 return false;
954 break;
955 }
956
957 APInt Val;
958 if (ISD::isConstantSplatVector(N, Val))
959 return Val.getLimitedValue() == MaskForTy;
960
961 return false;
962}
963
964// Determines if it is a constant integer or a splat/build vector of constant
965// integers (and undefs).
966// Do not permit build vector implicit truncation.
967static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969 return !(Const->isOpaque() && NoOpaques);
970 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971 return false;
972 unsigned BitWidth = N.getScalarValueSizeInBits();
973 for (const SDValue &Op : N->op_values()) {
974 if (Op.isUndef())
975 continue;
976 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978 (Const->isOpaque() && NoOpaques))
979 return false;
980 }
981 return true;
982}
983
984// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985// undef's.
986static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987 if (V.getOpcode() != ISD::BUILD_VECTOR)
988 return false;
989 return isConstantOrConstantVector(V, NoOpaques) ||
990 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
991}
992
993// Determine if this an indexed load with an opaque target constant index.
994static bool canSplitIdx(LoadSDNode *LD) {
995 return MaySplitLoadIndex &&
996 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998}
999
1000bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001 const SDLoc &DL,
1002 SDValue N0,
1003 SDValue N1) {
1004 // Currently this only tries to ensure we don't undo the GEP splits done by
1005 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006 // we check if the following transformation would be problematic:
1007 // (load/store (add, (add, x, offset1), offset2)) ->
1008 // (load/store (add, x, offset1+offset2)).
1009
1010 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011 return false;
1012
1013 if (N0.hasOneUse())
1014 return false;
1015
1016 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018 if (!C1 || !C2)
1019 return false;
1020
1021 const APInt &C1APIntVal = C1->getAPIntValue();
1022 const APInt &C2APIntVal = C2->getAPIntValue();
1023 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024 return false;
1025
1026 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027 if (CombinedValueIntVal.getBitWidth() > 64)
1028 return false;
1029 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030
1031 for (SDNode *Node : N0->uses()) {
1032 auto LoadStore = dyn_cast<MemSDNode>(Node);
1033 if (LoadStore) {
1034 // Is x[offset2] already not a legal addressing mode? If so then
1035 // reassociating the constants breaks nothing (we test offset2 because
1036 // that's the one we hope to fold into the load or store).
1037 TargetLoweringBase::AddrMode AM;
1038 AM.HasBaseReg = true;
1039 AM.BaseOffs = C2APIntVal.getSExtValue();
1040 EVT VT = LoadStore->getMemoryVT();
1041 unsigned AS = LoadStore->getAddressSpace();
1042 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044 continue;
1045
1046 // Would x[offset1+offset2] still be a legal addressing mode?
1047 AM.BaseOffs = CombinedValue;
1048 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049 return true;
1050 }
1051 }
1052
1053 return false;
1054}
1055
1056// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059 SDValue N0, SDValue N1) {
1060 EVT VT = N0.getValueType();
1061
1062 if (N0.getOpcode() != Opc)
1063 return SDValue();
1064
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1066 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1067 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068 if (SDValue OpNode =
1069 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071 return SDValue();
1072 }
1073 if (N0.hasOneUse()) {
1074 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075 // iff (op x, c1) has one use
1076 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077 if (!OpNode.getNode())
1078 return SDValue();
1079 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080 }
1081 }
1082 return SDValue();
1083}
1084
1085// Try to reassociate commutative binops.
1086SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087 SDValue N1, SDNodeFlags Flags) {
1088 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")((void)0);
1089
1090 // Floating-point reassociation is not allowed without loose FP math.
1091 if (N0.getValueType().isFloatingPoint() ||
1092 N1.getValueType().isFloatingPoint())
1093 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094 return SDValue();
1095
1096 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097 return Combined;
1098 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099 return Combined;
1100 return SDValue();
1101}
1102
1103SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104 bool AddTo) {
1105 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")((void)0);
1106 ++NodesCombined;
1107 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
1108 To[0].getNode()->dump(&DAG);do { } while (false)
1109 dbgs() << " and " << NumTo - 1 << " other values\n")do { } while (false);
1110 for (unsigned i = 0, e = NumTo; i != e; ++i)
1111 assert((!To[i].getNode() ||((void)0)
1112 N->getValueType(i) == To[i].getValueType()) &&((void)0)
1113 "Cannot combine value to value of different type!")((void)0);
1114
1115 WorklistRemover DeadNodes(*this);
1116 DAG.ReplaceAllUsesWith(N, To);
1117 if (AddTo) {
1118 // Push the new nodes and any users onto the worklist
1119 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120 if (To[i].getNode()) {
1121 AddToWorklist(To[i].getNode());
1122 AddUsersToWorklist(To[i].getNode());
1123 }
1124 }
1125 }
1126
1127 // Finally, if the node is now dead, remove it from the graph. The node
1128 // may not be dead if the replacement process recursively simplified to
1129 // something else needing this node.
1130 if (N->use_empty())
1131 deleteAndRecombine(N);
1132 return SDValue(N, 0);
1133}
1134
1135void DAGCombiner::
1136CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137 // Replace the old value with the new one.
1138 ++NodesCombined;
1139 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { } while (false)
1140 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { } while (false)
1141 dbgs() << '\n')do { } while (false);
1142
1143 // Replace all uses. If any nodes become isomorphic to other nodes and
1144 // are deleted, make sure to remove them from our worklist.
1145 WorklistRemover DeadNodes(*this);
1146 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147
1148 // Push the new node and any (possibly new) users onto the worklist.
1149 AddToWorklistWithUsers(TLO.New.getNode());
1150
1151 // Finally, if the node is now dead, remove it from the graph. The node
1152 // may not be dead if the replacement process recursively simplified to
1153 // something else needing this node.
1154 if (TLO.Old.getNode()->use_empty())
1155 deleteAndRecombine(TLO.Old.getNode());
1156}
1157
1158/// Check the specified integer node value to see if it can be simplified or if
1159/// things it uses can be simplified by bit propagation. If so, return true.
1160bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161 const APInt &DemandedElts,
1162 bool AssumeSingleUse) {
1163 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164 KnownBits Known;
1165 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166 AssumeSingleUse))
1167 return false;
1168
1169 // Revisit the node.
1170 AddToWorklist(Op.getNode());
1171
1172 CommitTargetLoweringOpt(TLO);
1173 return true;
1174}
1175
1176/// Check the specified vector node value to see if it can be simplified or
1177/// if things it uses can be simplified as it only uses some of the elements.
1178/// If so, return true.
1179bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180 const APInt &DemandedElts,
1181 bool AssumeSingleUse) {
1182 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183 APInt KnownUndef, KnownZero;
1184 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185 TLO, 0, AssumeSingleUse))
1186 return false;
1187
1188 // Revisit the node.
1189 AddToWorklist(Op.getNode());
1190
1191 CommitTargetLoweringOpt(TLO);
1192 return true;
1193}
1194
1195void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196 SDLoc DL(Load);
1197 EVT VT = Load->getValueType(0);
1198 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199
1200 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
1201 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
1202 WorklistRemover DeadNodes(*this);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205 deleteAndRecombine(Load);
1206 AddToWorklist(Trunc.getNode());
1207}
1208
1209SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210 Replace = false;
1211 SDLoc DL(Op);
1212 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213 LoadSDNode *LD = cast<LoadSDNode>(Op);
1214 EVT MemVT = LD->getMemoryVT();
1215 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1216 : LD->getExtensionType();
1217 Replace = true;
1218 return DAG.getExtLoad(ExtType, DL, PVT,
1219 LD->getChain(), LD->getBasePtr(),
1220 MemVT, LD->getMemOperand());
1221 }
1222
1223 unsigned Opc = Op.getOpcode();
1224 switch (Opc) {
1225 default: break;
1226 case ISD::AssertSext:
1227 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229 break;
1230 case ISD::AssertZext:
1231 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233 break;
1234 case ISD::Constant: {
1235 unsigned ExtOpc =
1236 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237 return DAG.getNode(ExtOpc, DL, PVT, Op);
1238 }
1239 }
1240
1241 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242 return SDValue();
1243 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244}
1245
1246SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1248 return SDValue();
1249 EVT OldVT = Op.getValueType();
1250 SDLoc DL(Op);
1251 bool Replace = false;
1252 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253 if (!NewOp.getNode())
1254 return SDValue();
1255 AddToWorklist(NewOp.getNode());
1256
1257 if (Replace)
1258 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260 DAG.getValueType(OldVT));
1261}
1262
1263SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264 EVT OldVT = Op.getValueType();
1265 SDLoc DL(Op);
1266 bool Replace = false;
1267 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268 if (!NewOp.getNode())
1269 return SDValue();
1270 AddToWorklist(NewOp.getNode());
1271
1272 if (Replace)
1273 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275}
1276
1277/// Promote the specified integer binary operation if the target indicates it is
1278/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279/// i32 since i16 instructions are longer.
1280SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281 if (!LegalOperations)
1282 return SDValue();
1283
1284 EVT VT = Op.getValueType();
1285 if (VT.isVector() || !VT.isInteger())
1286 return SDValue();
1287
1288 // If operation type is 'undesirable', e.g. i16 on x86, consider
1289 // promoting it.
1290 unsigned Opc = Op.getOpcode();
1291 if (TLI.isTypeDesirableForOp(Opc, VT))
1292 return SDValue();
1293
1294 EVT PVT = VT;
1295 // Consult target whether it is a good idea to promote this operation and
1296 // what's the right type to promote it to.
1297 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298 assert(PVT != VT && "Don't know what type to promote to!")((void)0);
1299
1300 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1301
1302 bool Replace0 = false;
1303 SDValue N0 = Op.getOperand(0);
1304 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305
1306 bool Replace1 = false;
1307 SDValue N1 = Op.getOperand(1);
1308 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309 SDLoc DL(Op);
1310
1311 SDValue RV =
1312 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313
1314 // We are always replacing N0/N1's use in N and only need additional
1315 // replacements if there are additional uses.
1316 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317 // (SDValue) here because the node may reference multiple values
1318 // (for example, the chain value of a load node).
1319 Replace0 &= !N0->hasOneUse();
1320 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321
1322 // Combine Op here so it is preserved past replacements.
1323 CombineTo(Op.getNode(), RV);
1324
1325 // If operands have a use ordering, make sure we deal with
1326 // predecessor first.
1327 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328 std::swap(N0, N1);
1329 std::swap(NN0, NN1);
1330 }
1331
1332 if (Replace0) {
1333 AddToWorklist(NN0.getNode());
1334 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335 }
1336 if (Replace1) {
1337 AddToWorklist(NN1.getNode());
1338 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339 }
1340 return Op;
1341 }
1342 return SDValue();
1343}
1344
1345/// Promote the specified integer shift operation if the target indicates it is
1346/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347/// i32 since i16 instructions are longer.
1348SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349 if (!LegalOperations)
1350 return SDValue();
1351
1352 EVT VT = Op.getValueType();
1353 if (VT.isVector() || !VT.isInteger())
1354 return SDValue();
1355
1356 // If operation type is 'undesirable', e.g. i16 on x86, consider
1357 // promoting it.
1358 unsigned Opc = Op.getOpcode();
1359 if (TLI.isTypeDesirableForOp(Opc, VT))
1360 return SDValue();
1361
1362 EVT PVT = VT;
1363 // Consult target whether it is a good idea to promote this operation and
1364 // what's the right type to promote it to.
1365 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366 assert(PVT != VT && "Don't know what type to promote to!")((void)0);
1367
1368 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1369
1370 bool Replace = false;
1371 SDValue N0 = Op.getOperand(0);
1372 SDValue N1 = Op.getOperand(1);
1373 if (Opc == ISD::SRA)
1374 N0 = SExtPromoteOperand(N0, PVT);
1375 else if (Opc == ISD::SRL)
1376 N0 = ZExtPromoteOperand(N0, PVT);
1377 else
1378 N0 = PromoteOperand(N0, PVT, Replace);
1379
1380 if (!N0.getNode())
1381 return SDValue();
1382
1383 SDLoc DL(Op);
1384 SDValue RV =
1385 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386
1387 if (Replace)
1388 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389
1390 // Deal with Op being deleted.
1391 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392 return RV;
1393 }
1394 return SDValue();
1395}
1396
1397SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398 if (!LegalOperations)
1399 return SDValue();
1400
1401 EVT VT = Op.getValueType();
1402 if (VT.isVector() || !VT.isInteger())
1403 return SDValue();
1404
1405 // If operation type is 'undesirable', e.g. i16 on x86, consider
1406 // promoting it.
1407 unsigned Opc = Op.getOpcode();
1408 if (TLI.isTypeDesirableForOp(Opc, VT))
1409 return SDValue();
1410
1411 EVT PVT = VT;
1412 // Consult target whether it is a good idea to promote this operation and
1413 // what's the right type to promote it to.
1414 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415 assert(PVT != VT && "Don't know what type to promote to!")((void)0);
1416 // fold (aext (aext x)) -> (aext x)
1417 // fold (aext (zext x)) -> (zext x)
1418 // fold (aext (sext x)) -> (sext x)
1419 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1420 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421 }
1422 return SDValue();
1423}
1424
1425bool DAGCombiner::PromoteLoad(SDValue Op) {
1426 if (!LegalOperations)
1427 return false;
1428
1429 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430 return false;
1431
1432 EVT VT = Op.getValueType();
1433 if (VT.isVector() || !VT.isInteger())
1434 return false;
1435
1436 // If operation type is 'undesirable', e.g. i16 on x86, consider
1437 // promoting it.
1438 unsigned Opc = Op.getOpcode();
1439 if (TLI.isTypeDesirableForOp(Opc, VT))
1440 return false;
1441
1442 EVT PVT = VT;
1443 // Consult target whether it is a good idea to promote this operation and
1444 // what's the right type to promote it to.
1445 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446 assert(PVT != VT && "Don't know what type to promote to!")((void)0);
1447
1448 SDLoc DL(Op);
1449 SDNode *N = Op.getNode();
1450 LoadSDNode *LD = cast<LoadSDNode>(N);
1451 EVT MemVT = LD->getMemoryVT();
1452 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1453 : LD->getExtensionType();
1454 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455 LD->getChain(), LD->getBasePtr(),
1456 MemVT, LD->getMemOperand());
1457 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458
1459 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { } while (false)
1460 Result.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
1461 WorklistRemover DeadNodes(*this);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464 deleteAndRecombine(N);
1465 AddToWorklist(Result.getNode());
1466 return true;
1467 }
1468 return false;
1469}
1470
1471/// Recursively delete a node which has no uses and any operands for
1472/// which it is the only use.
1473///
1474/// Note that this both deletes the nodes and removes them from the worklist.
1475/// It also adds any nodes who have had a user deleted to the worklist as they
1476/// may now have only one use and subject to other combines.
1477bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478 if (!N->use_empty())
1479 return false;
1480
1481 SmallSetVector<SDNode *, 16> Nodes;
1482 Nodes.insert(N);
1483 do {
1484 N = Nodes.pop_back_val();
1485 if (!N)
1486 continue;
1487
1488 if (N->use_empty()) {
1489 for (const SDValue &ChildN : N->op_values())
1490 Nodes.insert(ChildN.getNode());
1491
1492 removeFromWorklist(N);
1493 DAG.DeleteNode(N);
1494 } else {
1495 AddToWorklist(N);
1496 }
1497 } while (!Nodes.empty());
1498 return true;
1499}
1500
1501//===----------------------------------------------------------------------===//
1502// Main DAG Combiner implementation
1503//===----------------------------------------------------------------------===//
1504
1505void DAGCombiner::Run(CombineLevel AtLevel) {
1506 // set the instance variables, so that the various visit routines may use it.
1507 Level = AtLevel;
1508 LegalDAG = Level >= AfterLegalizeDAG;
1509 LegalOperations = Level >= AfterLegalizeVectorOps;
1510 LegalTypes = Level >= AfterLegalizeTypes;
1511
1512 WorklistInserter AddNodes(*this);
1513
1514 // Add all the dag nodes to the worklist.
1515 for (SDNode &Node : DAG.allnodes())
1516 AddToWorklist(&Node);
1517
1518 // Create a dummy node (which is not added to allnodes), that adds a reference
1519 // to the root node, preventing it from being deleted, and tracking any
1520 // changes of the root.
1521 HandleSDNode Dummy(DAG.getRoot());
1522
1523 // While we have a valid worklist entry node, try to combine it.
1524 while (SDNode *N = getNextWorklistEntry()) {
1525 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1526 // N is deleted from the DAG, since they too may now be dead or may have a
1527 // reduced number of uses, allowing other xforms.
1528 if (recursivelyDeleteUnusedNodes(N))
1529 continue;
1530
1531 WorklistRemover DeadNodes(*this);
1532
1533 // If this combine is running after legalizing the DAG, re-legalize any
1534 // nodes pulled off the worklist.
1535 if (LegalDAG) {
1536 SmallSetVector<SDNode *, 16> UpdatedNodes;
1537 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538
1539 for (SDNode *LN : UpdatedNodes)
1540 AddToWorklistWithUsers(LN);
1541
1542 if (!NIsValid)
1543 continue;
1544 }
1545
1546 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { } while (false);
1547
1548 // Add any operands of the new node which have not yet been combined to the
1549 // worklist as well. Because the worklist uniques things already, this
1550 // won't repeatedly process the same operand.
1551 CombinedNodes.insert(N);
1552 for (const SDValue &ChildN : N->op_values())
1553 if (!CombinedNodes.count(ChildN.getNode()))
1554 AddToWorklist(ChildN.getNode());
1555
1556 SDValue RV = combine(N);
1557
1558 if (!RV.getNode())
1559 continue;
1560
1561 ++NodesCombined;
1562
1563 // If we get back the same node we passed in, rather than a new node or
1564 // zero, we know that the node must have defined multiple values and
1565 // CombineTo was used. Since CombineTo takes care of the worklist
1566 // mechanics for us, we have no work to do in this case.
1567 if (RV.getNode() == N)
1568 continue;
1569
1570 assert(N->getOpcode() != ISD::DELETED_NODE &&((void)0)
1571 RV.getOpcode() != ISD::DELETED_NODE &&((void)0)
1572 "Node was deleted but visit returned new node!")((void)0);
1573
1574 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { } while (false);
1575
1576 if (N->getNumValues() == RV.getNode()->getNumValues())
1577 DAG.ReplaceAllUsesWith(N, RV.getNode());
1578 else {
1579 assert(N->getValueType(0) == RV.getValueType() &&((void)0)
1580 N->getNumValues() == 1 && "Type mismatch")((void)0);
1581 DAG.ReplaceAllUsesWith(N, &RV);
1582 }
1583
1584 // Push the new node and any users onto the worklist. Omit this if the
1585 // new node is the EntryToken (e.g. if a store managed to get optimized
1586 // out), because re-visiting the EntryToken and its users will not uncover
1587 // any additional opportunities, but there may be a large number of such
1588 // users, potentially causing compile time explosion.
1589 if (RV.getOpcode() != ISD::EntryToken) {
1590 AddToWorklist(RV.getNode());
1591 AddUsersToWorklist(RV.getNode());
1592 }
1593
1594 // Finally, if the node is now dead, remove it from the graph. The node
1595 // may not be dead if the replacement process recursively simplified to
1596 // something else needing this node. This will also take care of adding any
1597 // operands which have lost a user to the worklist.
1598 recursivelyDeleteUnusedNodes(N);
1599 }
1600
1601 // If the root changed (e.g. it was a dead load, update the root).
1602 DAG.setRoot(Dummy.getValue());
1603 DAG.RemoveDeadNodes();
1604}
1605
1606SDValue DAGCombiner::visit(SDNode *N) {
1607 switch (N->getOpcode()) {
1608 default: break;
1609 case ISD::TokenFactor: return visitTokenFactor(N);
1610 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1611 case ISD::ADD: return visitADD(N);
1612 case ISD::SUB: return visitSUB(N);
1613 case ISD::SADDSAT:
1614 case ISD::UADDSAT: return visitADDSAT(N);
1615 case ISD::SSUBSAT:
1616 case ISD::USUBSAT: return visitSUBSAT(N);
1617 case ISD::ADDC: return visitADDC(N);
1618 case ISD::SADDO:
1619 case ISD::UADDO: return visitADDO(N);
1620 case ISD::SUBC: return visitSUBC(N);
1621 case ISD::SSUBO:
1622 case ISD::USUBO: return visitSUBO(N);
1623 case ISD::ADDE: return visitADDE(N);
1624 case ISD::ADDCARRY: return visitADDCARRY(N);
1625 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1626 case ISD::SUBE: return visitSUBE(N);
1627 case ISD::SUBCARRY: return visitSUBCARRY(N);
1628 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1629 case ISD::SMULFIX:
1630 case ISD::SMULFIXSAT:
1631 case ISD::UMULFIX:
1632 case ISD::UMULFIXSAT: return visitMULFIX(N);
1633 case ISD::MUL: return visitMUL(N);
1634 case ISD::SDIV: return visitSDIV(N);
1635 case ISD::UDIV: return visitUDIV(N);
1636 case ISD::SREM:
1637 case ISD::UREM: return visitREM(N);
1638 case ISD::MULHU: return visitMULHU(N);
1639 case ISD::MULHS: return visitMULHS(N);
1640 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1641 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1642 case ISD::SMULO:
1643 case ISD::UMULO: return visitMULO(N);
1644 case ISD::SMIN:
1645 case ISD::SMAX:
1646 case ISD::UMIN:
1647 case ISD::UMAX: return visitIMINMAX(N);
1648 case ISD::AND: return visitAND(N);
1649 case ISD::OR: return visitOR(N);
1650 case ISD::XOR: return visitXOR(N);
1651 case ISD::SHL: return visitSHL(N);
1652 case ISD::SRA: return visitSRA(N);
1653 case ISD::SRL: return visitSRL(N);
1654 case ISD::ROTR:
1655 case ISD::ROTL: return visitRotate(N);
1656 case ISD::FSHL:
1657 case ISD::FSHR: return visitFunnelShift(N);
1658 case ISD::ABS: return visitABS(N);
1659 case ISD::BSWAP: return visitBSWAP(N);
1660 case ISD::BITREVERSE: return visitBITREVERSE(N);
1661 case ISD::CTLZ: return visitCTLZ(N);
1662 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1663 case ISD::CTTZ: return visitCTTZ(N);
1664 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1665 case ISD::CTPOP: return visitCTPOP(N);
1666 case ISD::SELECT: return visitSELECT(N);
1667 case ISD::VSELECT: return visitVSELECT(N);
1668 case ISD::SELECT_CC: return visitSELECT_CC(N);
1669 case ISD::SETCC: return visitSETCC(N);
1670 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674 case ISD::AssertSext:
1675 case ISD::AssertZext: return visitAssertExt(N);
1676 case ISD::AssertAlign: return visitAssertAlign(N);
1677 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1678 case ISD::SIGN_EXTEND_VECTOR_INREG:
1679 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680 case ISD::TRUNCATE: return visitTRUNCATE(N);
1681 case ISD::BITCAST: return visitBITCAST(N);
1682 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683 case ISD::FADD: return visitFADD(N);
1684 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685 case ISD::FSUB: return visitFSUB(N);
1686 case ISD::FMUL: return visitFMUL(N);
1687 case ISD::FMA: return visitFMA(N);
1688 case ISD::FDIV: return visitFDIV(N);
1689 case ISD::FREM: return visitFREM(N);
1690 case ISD::FSQRT: return visitFSQRT(N);
1691 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692 case ISD::FPOW: return visitFPOW(N);
1693 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697 case ISD::FP_ROUND: return visitFP_ROUND(N);
1698 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699 case ISD::FNEG: return visitFNEG(N);
1700 case ISD::FABS: return visitFABS(N);
1701 case ISD::FFLOOR: return visitFFLOOR(N);
1702 case ISD::FMINNUM: return visitFMINNUM(N);
1703 case ISD::FMAXNUM: return visitFMAXNUM(N);
1704 case ISD::FMINIMUM: return visitFMINIMUM(N);
1705 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1706 case ISD::FCEIL: return visitFCEIL(N);
1707 case ISD::FTRUNC: return visitFTRUNC(N);
1708 case ISD::BRCOND: return visitBRCOND(N);
1709 case ISD::BR_CC: return visitBR_CC(N);
1710 case ISD::LOAD: return visitLOAD(N);
1711 case ISD::STORE: return visitSTORE(N);
1712 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1713 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1715 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1716 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1717 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1718 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1719 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1720 case ISD::MGATHER: return visitMGATHER(N);
1721 case ISD::MLOAD: return visitMLOAD(N);
1722 case ISD::MSCATTER: return visitMSCATTER(N);
1723 case ISD::MSTORE: return visitMSTORE(N);
1724 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1725 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727 case ISD::FREEZE: return visitFREEZE(N);
1728 case ISD::VECREDUCE_FADD:
1729 case ISD::VECREDUCE_FMUL:
1730 case ISD::VECREDUCE_ADD:
1731 case ISD::VECREDUCE_MUL:
1732 case ISD::VECREDUCE_AND:
1733 case ISD::VECREDUCE_OR:
1734 case ISD::VECREDUCE_XOR:
1735 case ISD::VECREDUCE_SMAX:
1736 case ISD::VECREDUCE_SMIN:
1737 case ISD::VECREDUCE_UMAX:
1738 case ISD::VECREDUCE_UMIN:
1739 case ISD::VECREDUCE_FMAX:
1740 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741 }
1742 return SDValue();
1743}
1744
1745SDValue DAGCombiner::combine(SDNode *N) {
1746 SDValue RV;
1747 if (!DisableGenericCombines)
1748 RV = visit(N);
1749
1750 // If nothing happened, try a target-specific DAG combine.
1751 if (!RV.getNode()) {
1752 assert(N->getOpcode() != ISD::DELETED_NODE &&((void)0)
1753 "Node was deleted but visit returned NULL!")((void)0);
1754
1755 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757
1758 // Expose the DAG combiner to the target combiner impls.
1759 TargetLowering::DAGCombinerInfo
1760 DagCombineInfo(DAG, Level, false, this);
1761
1762 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763 }
1764 }
1765
1766 // If nothing happened still, try promoting the operation.
1767 if (!RV.getNode()) {
1768 switch (N->getOpcode()) {
1769 default: break;
1770 case ISD::ADD:
1771 case ISD::SUB:
1772 case ISD::MUL:
1773 case ISD::AND:
1774 case ISD::OR:
1775 case ISD::XOR:
1776 RV = PromoteIntBinOp(SDValue(N, 0));
1777 break;
1778 case ISD::SHL:
1779 case ISD::SRA:
1780 case ISD::SRL:
1781 RV = PromoteIntShiftOp(SDValue(N, 0));
1782 break;
1783 case ISD::SIGN_EXTEND:
1784 case ISD::ZERO_EXTEND:
1785 case ISD::ANY_EXTEND:
1786 RV = PromoteExtend(SDValue(N, 0));
1787 break;
1788 case ISD::LOAD:
1789 if (PromoteLoad(SDValue(N, 0)))
1790 RV = SDValue(N, 0);
1791 break;
1792 }
1793 }
1794
1795 // If N is a commutative binary node, try to eliminate it if the commuted
1796 // version is already present in the DAG.
1797 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798 N->getNumValues() == 1) {
1799 SDValue N0 = N->getOperand(0);
1800 SDValue N1 = N->getOperand(1);
1801
1802 // Constant operands are canonicalized to RHS.
1803 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804 SDValue Ops[] = {N1, N0};
1805 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806 N->getFlags());
1807 if (CSENode)
1808 return SDValue(CSENode, 0);
1809 }
1810 }
1811
1812 return RV;
1813}
1814
1815/// Given a node, return its input chain if it has one, otherwise return a null
1816/// sd operand.
1817static SDValue getInputChainForNode(SDNode *N) {
1818 if (unsigned NumOps = N->getNumOperands()) {
1819 if (N->getOperand(0).getValueType() == MVT::Other)
1820 return N->getOperand(0);
1821 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822 return N->getOperand(NumOps-1);
1823 for (unsigned i = 1; i < NumOps-1; ++i)
1824 if (N->getOperand(i).getValueType() == MVT::Other)
1825 return N->getOperand(i);
1826 }
1827 return SDValue();
1828}
1829
1830SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831 // If N has two operands, where one has an input chain equal to the other,
1832 // the 'other' chain is redundant.
1833 if (N->getNumOperands() == 2) {
1834 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835 return N->getOperand(0);
1836 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837 return N->getOperand(1);
1838 }
1839
1840 // Don't simplify token factors if optnone.
1841 if (OptLevel == CodeGenOpt::None)
1842 return SDValue();
1843
1844 // Don't simplify the token factor if the node itself has too many operands.
1845 if (N->getNumOperands() > TokenFactorInlineLimit)
1846 return SDValue();
1847
1848 // If the sole user is a token factor, we should make sure we have a
1849 // chance to merge them together. This prevents TF chains from inhibiting
1850 // optimizations.
1851 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852 AddToWorklist(*(N->use_begin()));
1853
1854 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1855 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1856 SmallPtrSet<SDNode*, 16> SeenOps;
1857 bool Changed = false; // If we should replace this token factor.
1858
1859 // Start out with this token factor.
1860 TFs.push_back(N);
1861
1862 // Iterate through token factors. The TFs grows when new token factors are
1863 // encountered.
1864 for (unsigned i = 0; i < TFs.size(); ++i) {
1865 // Limit number of nodes to inline, to avoid quadratic compile times.
1866 // We have to add the outstanding Token Factors to Ops, otherwise we might
1867 // drop Ops from the resulting Token Factors.
1868 if (Ops.size() > TokenFactorInlineLimit) {
1869 for (unsigned j = i; j < TFs.size(); j++)
1870 Ops.emplace_back(TFs[j], 0);
1871 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872 // combiner worklist later.
1873 TFs.resize(i);
1874 break;
1875 }
1876
1877 SDNode *TF = TFs[i];
1878 // Check each of the operands.
1879 for (const SDValue &Op : TF->op_values()) {
1880 switch (Op.getOpcode()) {
1881 case ISD::EntryToken:
1882 // Entry tokens don't need to be added to the list. They are
1883 // redundant.
1884 Changed = true;
1885 break;
1886
1887 case ISD::TokenFactor:
1888 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889 // Queue up for processing.
1890 TFs.push_back(Op.getNode());
1891 Changed = true;
1892 break;
1893 }
1894 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1895
1896 default:
1897 // Only add if it isn't already in the list.
1898 if (SeenOps.insert(Op.getNode()).second)
1899 Ops.push_back(Op);
1900 else
1901 Changed = true;
1902 break;
1903 }
1904 }
1905 }
1906
1907 // Re-visit inlined Token Factors, to clean them up in case they have been
1908 // removed. Skip the first Token Factor, as this is the current node.
1909 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910 AddToWorklist(TFs[i]);
1911
1912 // Remove Nodes that are chained to another node in the list. Do so
1913 // by walking up chains breath-first stopping when we've seen
1914 // another operand. In general we must climb to the EntryNode, but we can exit
1915 // early if we find all remaining work is associated with just one operand as
1916 // no further pruning is possible.
1917
1918 // List of nodes to search through and original Ops from which they originate.
1919 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1920 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921 SmallPtrSet<SDNode *, 16> SeenChains;
1922 bool DidPruneOps = false;
1923
1924 unsigned NumLeftToConsider = 0;
1925 for (const SDValue &Op : Ops) {
1926 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927 OpWorkCount.push_back(1);
1928 }
1929
1930 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931 // If this is an Op, we can remove the op from the list. Remark any
1932 // search associated with it as from the current OpNumber.
1933 if (SeenOps.contains(Op)) {
1934 Changed = true;
1935 DidPruneOps = true;
1936 unsigned OrigOpNumber = 0;
1937 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938 OrigOpNumber++;
1939 assert((OrigOpNumber != Ops.size()) &&((void)0)
1940 "expected to find TokenFactor Operand")((void)0);
1941 // Re-mark worklist from OrigOpNumber to OpNumber
1942 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943 if (Worklist[i].second == OrigOpNumber) {
1944 Worklist[i].second = OpNumber;
1945 }
1946 }
1947 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948 OpWorkCount[OrigOpNumber] = 0;
1949 NumLeftToConsider--;
1950 }
1951 // Add if it's a new chain
1952 if (SeenChains.insert(Op).second) {
1953 OpWorkCount[OpNumber]++;
1954 Worklist.push_back(std::make_pair(Op, OpNumber));
1955 }
1956 };
1957
1958 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959 // We need at least be consider at least 2 Ops to prune.
1960 if (NumLeftToConsider <= 1)
1961 break;
1962 auto CurNode = Worklist[i].first;
1963 auto CurOpNumber = Worklist[i].second;
1964 assert((OpWorkCount[CurOpNumber] > 0) &&((void)0)
1965 "Node should not appear in worklist")((void)0);
1966 switch (CurNode->getOpcode()) {
1967 case ISD::EntryToken:
1968 // Hitting EntryToken is the only way for the search to terminate without
1969 // hitting
1970 // another operand's search. Prevent us from marking this operand
1971 // considered.
1972 NumLeftToConsider++;
1973 break;
1974 case ISD::TokenFactor:
1975 for (const SDValue &Op : CurNode->op_values())
1976 AddToWorklist(i, Op.getNode(), CurOpNumber);
1977 break;
1978 case ISD::LIFETIME_START:
1979 case ISD::LIFETIME_END:
1980 case ISD::CopyFromReg:
1981 case ISD::CopyToReg:
1982 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983 break;
1984 default:
1985 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987 break;
1988 }
1989 OpWorkCount[CurOpNumber]--;
1990 if (OpWorkCount[CurOpNumber] == 0)
1991 NumLeftToConsider--;
1992 }
1993
1994 // If we've changed things around then replace token factor.
1995 if (Changed) {
1996 SDValue Result;
1997 if (Ops.empty()) {
1998 // The entry token is the only possible outcome.
1999 Result = DAG.getEntryNode();
2000 } else {
2001 if (DidPruneOps) {
2002 SmallVector<SDValue, 8> PrunedOps;
2003 //
2004 for (const SDValue &Op : Ops) {
2005 if (SeenChains.count(Op.getNode()) == 0)
2006 PrunedOps.push_back(Op);
2007 }
2008 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009 } else {
2010 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011 }
2012 }
2013 return Result;
2014 }
2015 return SDValue();
2016}
2017
2018/// MERGE_VALUES can always be eliminated.
2019SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020 WorklistRemover DeadNodes(*this);
2021 // Replacing results may cause a different MERGE_VALUES to suddenly
2022 // be CSE'd with N, and carry its uses with it. Iterate until no
2023 // uses remain, to ensure that the node can be safely deleted.
2024 // First add the users of this node to the work list so that they
2025 // can be tried again once they have new operands.
2026 AddUsersToWorklist(N);
2027 do {
2028 // Do as a single replacement to avoid rewalking use lists.
2029 SmallVector<SDValue, 8> Ops;
2030 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031 Ops.push_back(N->getOperand(i));
2032 DAG.ReplaceAllUsesWith(N, Ops.data());
2033 } while (!N->use_empty());
2034 deleteAndRecombine(N);
2035 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2036}
2037
2038/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039/// ConstantSDNode pointer else nullptr.
2040static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2041 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043}
2044
2045/// Return true if 'Use' is a load or a store that uses N as its base pointer
2046/// and that N may be folded in the load / store addressing mode.
2047static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2048 const TargetLowering &TLI) {
2049 EVT VT;
2050 unsigned AS;
2051
2052 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054 return false;
2055 VT = LD->getMemoryVT();
2056 AS = LD->getAddressSpace();
2057 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059 return false;
2060 VT = ST->getMemoryVT();
2061 AS = ST->getAddressSpace();
2062 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064 return false;
2065 VT = LD->getMemoryVT();
2066 AS = LD->getAddressSpace();
2067 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069 return false;
2070 VT = ST->getMemoryVT();
2071 AS = ST->getAddressSpace();
2072 } else
2073 return false;
2074
2075 TargetLowering::AddrMode AM;
2076 if (N->getOpcode() == ISD::ADD) {
2077 AM.HasBaseReg = true;
2078 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079 if (Offset)
2080 // [reg +/- imm]
2081 AM.BaseOffs = Offset->getSExtValue();
2082 else
2083 // [reg +/- reg]
2084 AM.Scale = 1;
2085 } else if (N->getOpcode() == ISD::SUB) {
2086 AM.HasBaseReg = true;
2087 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088 if (Offset)
2089 // [reg +/- imm]
2090 AM.BaseOffs = -Offset->getSExtValue();
2091 else
2092 // [reg +/- reg]
2093 AM.Scale = 1;
2094 } else
2095 return false;
2096
2097 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098 VT.getTypeForEVT(*DAG.getContext()), AS);
2099}
2100
2101SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&((void)0)
2103 "Unexpected binary operator")((void)0);
2104
2105 // Don't do this unless the old select is going away. We want to eliminate the
2106 // binary operator, not replace a binop with a select.
2107 // TODO: Handle ISD::SELECT_CC.
2108 unsigned SelOpNo = 0;
2109 SDValue Sel = BO->getOperand(0);
2110 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111 SelOpNo = 1;
2112 Sel = BO->getOperand(1);
2113 }
2114
2115 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116 return SDValue();
2117
2118 SDValue CT = Sel.getOperand(1);
2119 if (!isConstantOrConstantVector(CT, true) &&
2120 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2121 return SDValue();
2122
2123 SDValue CF = Sel.getOperand(2);
2124 if (!isConstantOrConstantVector(CF, true) &&
2125 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2126 return SDValue();
2127
2128 // Bail out if any constants are opaque because we can't constant fold those.
2129 // The exception is "and" and "or" with either 0 or -1 in which case we can
2130 // propagate non constant operands into select. I.e.:
2131 // and (select Cond, 0, -1), X --> select Cond, 0, X
2132 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133 auto BinOpcode = BO->getOpcode();
2134 bool CanFoldNonConst =
2135 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2136 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2137 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2138
2139 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140 if (!CanFoldNonConst &&
2141 !isConstantOrConstantVector(CBO, true) &&
2142 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2143 return SDValue();
2144
2145 EVT VT = BO->getValueType(0);
2146
2147 // We have a select-of-constants followed by a binary operator with a
2148 // constant. Eliminate the binop by pulling the constant math into the select.
2149 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150 SDLoc DL(Sel);
2151 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153 if (!CanFoldNonConst && !NewCT.isUndef() &&
2154 !isConstantOrConstantVector(NewCT, true) &&
2155 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2156 return SDValue();
2157
2158 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160 if (!CanFoldNonConst && !NewCF.isUndef() &&
2161 !isConstantOrConstantVector(NewCF, true) &&
2162 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2163 return SDValue();
2164
2165 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166 SelectOp->setFlags(BO->getFlags());
2167 return SelectOp;
2168}
2169
2170static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2171 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&((void)0)
2172 "Expecting add or sub")((void)0);
2173
2174 // Match a constant operand and a zext operand for the math instruction:
2175 // add Z, C
2176 // sub C, Z
2177 bool IsAdd = N->getOpcode() == ISD::ADD;
2178 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180 auto *CN = dyn_cast<ConstantSDNode>(C);
2181 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182 return SDValue();
2183
2184 // Match the zext operand as a setcc of a boolean.
2185 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186 Z.getOperand(0).getValueType() != MVT::i1)
2187 return SDValue();
2188
2189 // Match the compare as: setcc (X & 1), 0, eq.
2190 SDValue SetCC = Z.getOperand(0);
2191 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195 return SDValue();
2196
2197 // We are adding/subtracting a constant and an inverted low bit. Turn that
2198 // into a subtract/add of the low bit with incremented/decremented constant:
2199 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201 EVT VT = C.getValueType();
2202 SDLoc DL(N);
2203 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207}
2208
2209/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210/// a shift and add with a different constant.
2211static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2212 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&((void)0)
2213 "Expecting add or sub")((void)0);
2214
2215 // We need a constant operand for the add/sub, and the other operand is a
2216 // logical shift right: add (srl), C or sub C, (srl).
2217 bool IsAdd = N->getOpcode() == ISD::ADD;
2218 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221 ShiftOp.getOpcode() != ISD::SRL)
2222 return SDValue();
2223
2224 // The shift must be of a 'not' value.
2225 SDValue Not = ShiftOp.getOperand(0);
2226 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227 return SDValue();
2228
2229 // The shift must be moving the sign bit to the least-significant-bit.
2230 EVT VT = ShiftOp.getValueType();
2231 SDValue ShAmt = ShiftOp.getOperand(1);
2232 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234 return SDValue();
2235
2236 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239 SDLoc DL(N);
2240 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242 if (SDValue NewC =
2243 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246 return SDValue();
2247}
2248
2249/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251/// are no common bits set in the operands).
2252SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253 SDValue N0 = N->getOperand(0);
2254 SDValue N1 = N->getOperand(1);
2255 EVT VT = N0.getValueType();
2256 SDLoc DL(N);
2257
2258 // fold vector ops
2259 if (VT.isVector()) {
2260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261 return FoldedVOp;
2262
2263 // fold (add x, 0) -> x, vector edition
2264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2265 return N0;
2266 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2267 return N1;
2268 }
2269
2270 // fold (add x, undef) -> undef
2271 if (N0.isUndef())
2272 return N0;
2273
2274 if (N1.isUndef())
2275 return N1;
2276
2277 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2278 // canonicalize constant to RHS
2279 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2280 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281 // fold (add c1, c2) -> c1+c2
2282 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283 }
2284
2285 // fold (add x, 0) -> x
2286 if (isNullConstant(N1))
2287 return N0;
2288
2289 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290 // fold ((A-c1)+c2) -> (A+(c2-c1))
2291 if (N0.getOpcode() == ISD::SUB &&
2292 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293 SDValue Sub =
2294 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295 assert(Sub && "Constant folding failed")((void)0);
2296 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297 }
2298
2299 // fold ((c1-A)+c2) -> (c1+c2)-A
2300 if (N0.getOpcode() == ISD::SUB &&
2301 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302 SDValue Add =
2303 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304 assert(Add && "Constant folding failed")((void)0);
2305 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306 }
2307
2308 // add (sext i1 X), 1 -> zext (not i1 X)
2309 // We don't transform this pattern:
2310 // add (zext i1 X), -1 -> sext (not i1 X)
2311 // because most (?) targets generate better code for the zext form.
2312 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313 isOneOrOneSplat(N1)) {
2314 SDValue X = N0.getOperand(0);
2315 if ((!LegalOperations ||
2316 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318 X.getScalarValueSizeInBits() == 1) {
2319 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321 }
2322 }
2323
2324 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325 // equivalent to (add x, c0).
2326 if (N0.getOpcode() == ISD::OR &&
2327 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330 {N1, N0.getOperand(1)}))
2331 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332 }
2333 }
2334
2335 if (SDValue NewSel = foldBinOpIntoSelect(N))
2336 return NewSel;
2337
2338 // reassociate add
2339 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341 return RADD;
2342
2343 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344 // equivalent to (add x, c).
2345 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346 if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349 return DAG.getNode(ISD::ADD, DL, VT,
2350 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351 N0.getOperand(1));
2352 }
2353 return SDValue();
2354 };
2355 if (SDValue Add = ReassociateAddOr(N0, N1))
2356 return Add;
2357 if (SDValue Add = ReassociateAddOr(N1, N0))
2358 return Add;
2359 }
2360 // fold ((0-A) + B) -> B-A
2361 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364 // fold (A + (0-B)) -> A-B
2365 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368 // fold (A+(B-A)) -> B
2369 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370 return N1.getOperand(0);
2371
2372 // fold ((B-A)+A) -> B
2373 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374 return N0.getOperand(0);
2375
2376 // fold ((A-B)+(C-A)) -> (C-B)
2377 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378 N0.getOperand(0) == N1.getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N0.getOperand(1));
2381
2382 // fold ((A-B)+(B-C)) -> (A-C)
2383 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384 N0.getOperand(1) == N1.getOperand(0))
2385 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386 N1.getOperand(1));
2387
2388 // fold (A+(B-(A+C))) to (B-C)
2389 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390 N0 == N1.getOperand(1).getOperand(0))
2391 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392 N1.getOperand(1).getOperand(1));
2393
2394 // fold (A+(B-(C+A))) to (B-C)
2395 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396 N0 == N1.getOperand(1).getOperand(1))
2397 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398 N1.getOperand(1).getOperand(0));
2399
2400 // fold (A+((B-A)+or-C)) to (B+or-C)
2401 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402 N1.getOperand(0).getOpcode() == ISD::SUB &&
2403 N0 == N1.getOperand(0).getOperand(1))
2404 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405 N1.getOperand(1));
2406
2407 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409 SDValue N00 = N0.getOperand(0);
2410 SDValue N01 = N0.getOperand(1);
2411 SDValue N10 = N1.getOperand(0);
2412 SDValue N11 = N1.getOperand(1);
2413
2414 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415 return DAG.getNode(ISD::SUB, DL, VT,
2416 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418 }
2419
2420 // fold (add (umax X, C), -C) --> (usubsat X, C)
2421 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423 return (!Max && !Op) ||
2424 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425 };
2426 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427 /*AllowUndefs*/ true))
2428 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429 N0.getOperand(1));
2430 }
2431
2432 if (SimplifyDemandedBits(SDValue(N, 0)))
2433 return SDValue(N, 0);
2434
2435 if (isOneOrOneSplat(N1)) {
2436 // fold (add (xor a, -1), 1) -> (sub 0, a)
2437 if (isBitwiseNot(N0))
2438 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439 N0.getOperand(0));
2440
2441 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442 if (N0.getOpcode() == ISD::ADD) {
2443 SDValue A, Xor;
2444
2445 if (isBitwiseNot(N0.getOperand(0))) {
2446 A = N0.getOperand(1);
2447 Xor = N0.getOperand(0);
2448 } else if (isBitwiseNot(N0.getOperand(1))) {
2449 A = N0.getOperand(0);
2450 Xor = N0.getOperand(1);
2451 }
2452
2453 if (Xor)
2454 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455 }
2456
2457 // Look for:
2458 // add (add x, y), 1
2459 // And if the target does not like this form then turn into:
2460 // sub y, (xor x, -1)
2461 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462 N0.getOpcode() == ISD::ADD) {
2463 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464 DAG.getAllOnesConstant(DL, VT));
2465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466 }
2467 }
2468
2469 // (x - y) + -1 -> add (xor y, -1), x
2470 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471 isAllOnesOrAllOnesSplat(N1)) {
2472 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474 }
2475
2476 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477 return Combined;
2478
2479 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480 return Combined;
2481
2482 return SDValue();
2483}
2484
2485SDValue DAGCombiner::visitADD(SDNode *N) {
2486 SDValue N0 = N->getOperand(0);
2487 SDValue N1 = N->getOperand(1);
2488 EVT VT = N0.getValueType();
2489 SDLoc DL(N);
2490
2491 if (SDValue Combined = visitADDLike(N))
2492 return Combined;
2493
2494 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495 return V;
2496
2497 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498 return V;
2499
2500 // fold (a+b) -> (a|b) iff a and b share no bits.
2501 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502 DAG.haveNoCommonBitsSet(N0, N1))
2503 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504
2505 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507 const APInt &C0 = N0->getConstantOperandAPInt(0);
2508 const APInt &C1 = N1->getConstantOperandAPInt(0);
2509 return DAG.getVScale(DL, VT, C0 + C1);
2510 }
2511
2512 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513 if ((N0.getOpcode() == ISD::ADD) &&
2514 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515 (N1.getOpcode() == ISD::VSCALE)) {
2516 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520 }
2521
2522 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524 N1.getOpcode() == ISD::STEP_VECTOR) {
2525 const APInt &C0 = N0->getConstantOperandAPInt(0);
2526 const APInt &C1 = N1->getConstantOperandAPInt(0);
2527 APInt NewStep = C0 + C1;
2528 return DAG.getStepVector(DL, VT, NewStep);
2529 }
2530
2531 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532 if ((N0.getOpcode() == ISD::ADD) &&
2533 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537 APInt NewStep = SV0 + SV1;
2538 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540 }
2541
2542 return SDValue();
2543}
2544
2545SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546 unsigned Opcode = N->getOpcode();
2547 SDValue N0 = N->getOperand(0);
2548 SDValue N1 = N->getOperand(1);
2549 EVT VT = N0.getValueType();
2550 SDLoc DL(N);
2551
2552 // fold vector ops
2553 if (VT.isVector()) {
2554 // TODO SimplifyVBinOp
2555
2556 // fold (add_sat x, 0) -> x, vector edition
2557 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2558 return N0;
2559 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2560 return N1;
2561 }
2562
2563 // fold (add_sat x, undef) -> -1
2564 if (N0.isUndef() || N1.isUndef())
2565 return DAG.getAllOnesConstant(DL, VT);
2566
2567 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2568 // canonicalize constant to RHS
2569 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2570 return DAG.getNode(Opcode, DL, VT, N1, N0);
2571 // fold (add_sat c1, c2) -> c3
2572 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2573 }
2574
2575 // fold (add_sat x, 0) -> x
2576 if (isNullConstant(N1))
2577 return N0;
2578
2579 // If it cannot overflow, transform into an add.
2580 if (Opcode == ISD::UADDSAT)
2581 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2582 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583
2584 return SDValue();
2585}
2586
2587static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2588 bool Masked = false;
2589
2590 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2591 while (true) {
2592 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2593 V = V.getOperand(0);
2594 continue;
2595 }
2596
2597 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2598 Masked = true;
2599 V = V.getOperand(0);
2600 continue;
2601 }
2602
2603 break;
2604 }
2605
2606 // If this is not a carry, return.
2607 if (V.getResNo() != 1)
2608 return SDValue();
2609
2610 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2611 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2612 return SDValue();
2613
2614 EVT VT = V.getNode()->getValueType(0);
2615 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2616 return SDValue();
2617
2618 // If the result is masked, then no matter what kind of bool it is we can
2619 // return. If it isn't, then we need to make sure the bool type is either 0 or
2620 // 1 and not other values.
2621 if (Masked ||
2622 TLI.getBooleanContents(V.getValueType()) ==
2623 TargetLoweringBase::ZeroOrOneBooleanContent)
2624 return V;
2625
2626 return SDValue();
2627}
2628
2629/// Given the operands of an add/sub operation, see if the 2nd operand is a
2630/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2631/// the opcode and bypass the mask operation.
2632static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2633 SelectionDAG &DAG, const SDLoc &DL) {
2634 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2635 return SDValue();
2636
2637 EVT VT = N0.getValueType();
2638 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2639 return SDValue();
2640
2641 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2642 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2643 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2644}
2645
2646/// Helper for doing combines based on N0 and N1 being added to each other.
2647SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2648 SDNode *LocReference) {
2649 EVT VT = N0.getValueType();
2650 SDLoc DL(LocReference);
2651
2652 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2653 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2654 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2655 return DAG.getNode(ISD::SUB, DL, VT, N0,
2656 DAG.getNode(ISD::SHL, DL, VT,
2657 N1.getOperand(0).getOperand(1),
2658 N1.getOperand(1)));
2659
2660 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2661 return V;
2662
2663 // Look for:
2664 // add (add x, 1), y
2665 // And if the target does not like this form then turn into:
2666 // sub y, (xor x, -1)
2667 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2668 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2669 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2670 DAG.getAllOnesConstant(DL, VT));
2671 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2672 }
2673
2674 // Hoist one-use subtraction by non-opaque constant:
2675 // (x - C) + y -> (x + y) - C
2676 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2677 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2678 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2679 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2680 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681 }
2682 // Hoist one-use subtraction from non-opaque constant:
2683 // (C - x) + y -> (y - x) + C
2684 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2685 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2686 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2687 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2688 }
2689
2690 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2691 // rather than 'add 0/-1' (the zext should get folded).
2692 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2693 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2694 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2695 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2696 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2697 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2698 }
2699
2700 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2701 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2702 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2703 if (TN->getVT() == MVT::i1) {
2704 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2705 DAG.getConstant(1, DL, VT));
2706 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2707 }
2708 }
2709
2710 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2711 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2712 N1.getResNo() == 0)
2713 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2714 N0, N1.getOperand(0), N1.getOperand(2));
2715
2716 // (add X, Carry) -> (addcarry X, 0, Carry)
2717 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2718 if (SDValue Carry = getAsCarry(TLI, N1))
2719 return DAG.getNode(ISD::ADDCARRY, DL,
2720 DAG.getVTList(VT, Carry.getValueType()), N0,
2721 DAG.getConstant(0, DL, VT), Carry);
2722
2723 return SDValue();
2724}
2725
2726SDValue DAGCombiner::visitADDC(SDNode *N) {
2727 SDValue N0 = N->getOperand(0);
2728 SDValue N1 = N->getOperand(1);
2729 EVT VT = N0.getValueType();
2730 SDLoc DL(N);
2731
2732 // If the flag result is dead, turn this into an ADD.
2733 if (!N->hasAnyUseOfValue(1))
2734 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2735 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2736
2737 // canonicalize constant to RHS.
2738 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2739 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2740 if (N0C && !N1C)
2741 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742
2743 // fold (addc x, 0) -> x + no carry out
2744 if (isNullConstant(N1))
2745 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2746 DL, MVT::Glue));
2747
2748 // If it cannot overflow, transform into an add.
2749 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2750 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2751 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752
2753 return SDValue();
2754}
2755
2756/**
2757 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2758 * then the flip also occurs if computing the inverse is the same cost.
2759 * This function returns an empty SDValue in case it cannot flip the boolean
2760 * without increasing the cost of the computation. If you want to flip a boolean
2761 * no matter what, use DAG.getLogicalNOT.
2762 */
2763static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2764 const TargetLowering &TLI,
2765 bool Force) {
2766 if (Force && isa<ConstantSDNode>(V))
2767 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768
2769 if (V.getOpcode() != ISD::XOR)
2770 return SDValue();
2771
2772 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2773 if (!Const)
2774 return SDValue();
2775
2776 EVT VT = V.getValueType();
2777
2778 bool IsFlip = false;
2779 switch(TLI.getBooleanContents(VT)) {
2780 case TargetLowering::ZeroOrOneBooleanContent:
2781 IsFlip = Const->isOne();
2782 break;
2783 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2784 IsFlip = Const->isAllOnesValue();
2785 break;
2786 case TargetLowering::UndefinedBooleanContent:
2787 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2788 break;
2789 }
2790
2791 if (IsFlip)
2792 return V.getOperand(0);
2793 if (Force)
2794 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2795 return SDValue();
2796}
2797
2798SDValue DAGCombiner::visitADDO(SDNode *N) {
2799 SDValue N0 = N->getOperand(0);
2800 SDValue N1 = N->getOperand(1);
2801 EVT VT = N0.getValueType();
2802 bool IsSigned = (ISD::SADDO == N->getOpcode());
2803
2804 EVT CarryVT = N->getValueType(1);
2805 SDLoc DL(N);
2806
2807 // If the flag result is dead, turn this into an ADD.
2808 if (!N->hasAnyUseOfValue(1))
2809 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2810 DAG.getUNDEF(CarryVT));
2811
2812 // canonicalize constant to RHS.
2813 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2814 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2815 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816
2817 // fold (addo x, 0) -> x + no carry out
2818 if (isNullOrNullSplat(N1))
2819 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820
2821 if (!IsSigned) {
2822 // If it cannot overflow, transform into an add.
2823 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2824 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2825 DAG.getConstant(0, DL, CarryVT));
2826
2827 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2828 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2829 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2830 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2831 return CombineTo(
2832 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2833 }
2834
2835 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2836 return Combined;
2837
2838 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2839 return Combined;
2840 }
2841
2842 return SDValue();
2843}
2844
2845SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2846 EVT VT = N0.getValueType();
2847 if (VT.isVector())
2848 return SDValue();
2849
2850 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2851 // If Y + 1 cannot overflow.
2852 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2853 SDValue Y = N1.getOperand(0);
2854 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2855 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2856 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2857 N1.getOperand(2));
2858 }
2859
2860 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2861 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2862 if (SDValue Carry = getAsCarry(TLI, N1))
2863 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2864 DAG.getConstant(0, SDLoc(N), VT), Carry);
2865
2866 return SDValue();
2867}
2868
2869SDValue DAGCombiner::visitADDE(SDNode *N) {
2870 SDValue N0 = N->getOperand(0);
2871 SDValue N1 = N->getOperand(1);
2872 SDValue CarryIn = N->getOperand(2);
2873
2874 // canonicalize constant to RHS
2875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877 if (N0C && !N1C)
2878 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2879 N1, N0, CarryIn);
2880
2881 // fold (adde x, y, false) -> (addc x, y)
2882 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2883 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884
2885 return SDValue();
2886}
2887
2888SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2889 SDValue N0 = N->getOperand(0);
2890 SDValue N1 = N->getOperand(1);
2891 SDValue CarryIn = N->getOperand(2);
2892 SDLoc DL(N);
2893
2894 // canonicalize constant to RHS
2895 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897 if (N0C && !N1C)
2898 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899
2900 // fold (addcarry x, y, false) -> (uaddo x, y)
2901 if (isNullConstant(CarryIn)) {
2902 if (!LegalOperations ||
2903 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2904 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2905 }
2906
2907 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2908 if (isNullConstant(N0) && isNullConstant(N1)) {
2909 EVT VT = N0.getValueType();
2910 EVT CarryVT = CarryIn.getValueType();
2911 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2912 AddToWorklist(CarryExt.getNode());
2913 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2914 DAG.getConstant(1, DL, VT)),
2915 DAG.getConstant(0, DL, CarryVT));
2916 }
2917
2918 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2919 return Combined;
2920
2921 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2922 return Combined;
2923
2924 return SDValue();
2925}
2926
2927SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2928 SDValue N0 = N->getOperand(0);
2929 SDValue N1 = N->getOperand(1);
2930 SDValue CarryIn = N->getOperand(2);
2931 SDLoc DL(N);
2932
2933 // canonicalize constant to RHS
2934 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2935 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2936 if (N0C && !N1C)
2937 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938
2939 // fold (saddo_carry x, y, false) -> (saddo x, y)
2940 if (isNullConstant(CarryIn)) {
2941 if (!LegalOperations ||
2942 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2943 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2944 }
2945
2946 return SDValue();
2947}
2948
2949/**
2950 * If we are facing some sort of diamond carry propapagtion pattern try to
2951 * break it up to generate something like:
2952 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2953 *
2954 * The end result is usually an increase in operation required, but because the
2955 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956 *
2957 * Patterns typically look something like
2958 * (uaddo A, B)
2959 * / \
2960 * Carry Sum
2961 * | \
2962 * | (addcarry *, 0, Z)
2963 * | /
2964 * \ Carry
2965 * | /
2966 * (addcarry X, *, *)
2967 *
2968 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2969 * produce a combine with a single path for carry propagation.
2970 */
2971static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2972 SDValue X, SDValue Carry0, SDValue Carry1,
2973 SDNode *N) {
2974 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2975 return SDValue();
2976 if (Carry1.getOpcode() != ISD::UADDO)
2977 return SDValue();
2978
2979 SDValue Z;
2980
2981 /**
2982 * First look for a suitable Z. It will present itself in the form of
2983 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984 */
2985 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2986 isNullConstant(Carry0.getOperand(1))) {
2987 Z = Carry0.getOperand(2);
2988 } else if (Carry0.getOpcode() == ISD::UADDO &&
2989 isOneConstant(Carry0.getOperand(1))) {
2990 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2991 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2992 } else {
2993 // We couldn't find a suitable Z.
2994 return SDValue();
2995 }
2996
2997
2998 auto cancelDiamond = [&](SDValue A,SDValue B) {
2999 SDLoc DL(N);
3000 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3001 Combiner.AddToWorklist(NewY.getNode());
3002 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3003 DAG.getConstant(0, DL, X.getValueType()),
3004 NewY.getValue(1));
3005 };
3006
3007 /**
3008 * (uaddo A, B)
3009 * |
3010 * Sum
3011 * |
3012 * (addcarry *, 0, Z)
3013 */
3014 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3015 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3016 }
3017
3018 /**
3019 * (addcarry A, 0, Z)
3020 * |
3021 * Sum
3022 * |
3023 * (uaddo *, B)
3024 */
3025 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3026 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3027 }
3028
3029 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3030 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3031 }
3032
3033 return SDValue();
3034}
3035
3036// If we are facing some sort of diamond carry/borrow in/out pattern try to
3037// match patterns like:
3038//
3039// (uaddo A, B) CarryIn
3040// | \ |
3041// | \ |
3042// PartialSum PartialCarryOutX /
3043// | | /
3044// | ____|____________/
3045// | / |
3046// (uaddo *, *) \________
3047// | \ \
3048// | \ |
3049// | PartialCarryOutY |
3050// | \ |
3051// | \ /
3052// AddCarrySum | ______/
3053// | /
3054// CarryOut = (or *, *)
3055//
3056// And generate ADDCARRY (or SUBCARRY) with two result values:
3057//
3058// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059//
3060// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3061// a single path for carry/borrow out propagation:
3062static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3063 const TargetLowering &TLI, SDValue Carry0,
3064 SDValue Carry1, SDNode *N) {
3065 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3066 return SDValue();
3067 unsigned Opcode = Carry0.getOpcode();
3068 if (Opcode != Carry1.getOpcode())
3069 return SDValue();
3070 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3071 return SDValue();
3072
3073 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3074 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3075 // the above ASCII art.)
3076 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3077 Carry1.getOperand(1) != Carry0.getValue(0))
3078 std::swap(Carry0, Carry1);
3079 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3080 Carry1.getOperand(1) != Carry0.getValue(0))
3081 return SDValue();
3082
3083 // The carry in value must be on the righthand side for subtraction.
3084 unsigned CarryInOperandNum =
3085 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3086 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3087 return SDValue();
3088 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089
3090 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3091 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3092 return SDValue();
3093
3094 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3095 // TODO: make getAsCarry() aware of how partial carries are merged.
3096 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3097 return SDValue();
3098 CarryIn = CarryIn.getOperand(0);
3099 if (CarryIn.getValueType() != MVT::i1)
3100 return SDValue();
3101
3102 SDLoc DL(N);
3103 SDValue Merged =
3104 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3105 Carry0.getOperand(1), CarryIn);
3106
3107 // Please note that because we have proven that the result of the UADDO/USUBO
3108 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3109 // therefore prove that if the first UADDO/USUBO overflows, the second
3110 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3111 // maximum value.
3112 //
3113 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3114 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115 //
3116 // This is important because it means that OR and XOR can be used to merge
3117 // carry flags; and that AND can return a constant zero.
3118 //
3119 // TODO: match other operations that can merge flags (ADD, etc)
3120 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3121 if (N->getOpcode() == ISD::AND)
3122 return DAG.getConstant(0, DL, MVT::i1);
3123 return Merged.getValue(1);
3124}
3125
3126SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3127 SDNode *N) {
3128 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3129 if (isBitwiseNot(N0))
3130 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3131 SDLoc DL(N);
3132 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3133 N0.getOperand(0), NotC);
3134 return CombineTo(
3135 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3136 }
3137
3138 // Iff the flag result is dead:
3139 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3140 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3141 // or the dependency between the instructions.
3142 if ((N0.getOpcode() == ISD::ADD ||
3143 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3144 N0.getValue(1) != CarryIn)) &&
3145 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3146 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3147 N0.getOperand(0), N0.getOperand(1), CarryIn);
3148
3149 /**
3150 * When one of the addcarry argument is itself a carry, we may be facing
3151 * a diamond carry propagation. In which case we try to transform the DAG
3152 * to ensure linear carry propagation if that is possible.
3153 */
3154 if (auto Y = getAsCarry(TLI, N1)) {
3155 // Because both are carries, Y and Z can be swapped.
3156 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3157 return R;
3158 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3159 return R;
3160 }
3161
3162 return SDValue();
3163}
3164
3165// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3166// clamp/truncation if necessary.
3167static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3168 SDValue RHS, SelectionDAG &DAG,
3169 const SDLoc &DL) {
3170 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&((void)0)
3171 "Illegal truncation")((void)0);
3172
3173 if (DstVT == SrcVT)
3174 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175
3176 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3177 // clamping RHS.
3178 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3179 DstVT.getScalarSizeInBits());
3180 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3181 return SDValue();
3182
3183 SDValue SatLimit =
3184 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3185 DstVT.getScalarSizeInBits()),
3186 DL, SrcVT);
3187 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3188 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3189 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3190 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3191}
3192
3193// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3194// usubsat(a,b), optionally as a truncated type.
3195SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3196 if (N->getOpcode() != ISD::SUB ||
3197 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3198 return SDValue();
3199
3200 EVT SubVT = N->getValueType(0);
3201 SDValue Op0 = N->getOperand(0);
3202 SDValue Op1 = N->getOperand(1);
3203
3204 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3205 // they may be converted to usubsat(a,b).
3206 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3207 SDValue MaxLHS = Op0.getOperand(0);
3208 SDValue MaxRHS = Op0.getOperand(1);
3209 if (MaxLHS == Op1)
3210 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3211 if (MaxRHS == Op1)
3212 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3213 }
3214
3215 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3216 SDValue MinLHS = Op1.getOperand(0);
3217 SDValue MinRHS = Op1.getOperand(1);
3218 if (MinLHS == Op0)
3219 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3220 if (MinRHS == Op0)
3221 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3222 }
3223
3224 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3225 if (Op1.getOpcode() == ISD::TRUNCATE &&
3226 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3227 Op1.getOperand(0).hasOneUse()) {
3228 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3229 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3230 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3231 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3232 DAG, SDLoc(N));
3233 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3234 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3235 DAG, SDLoc(N));
3236 }
3237
3238 return SDValue();
3239}
3240
3241// Since it may not be valid to emit a fold to zero for vector initializers
3242// check if we can before folding.
3243static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3244 SelectionDAG &DAG, bool LegalOperations) {
3245 if (!VT.isVector())
3246 return DAG.getConstant(0, DL, VT);
3247 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3248 return DAG.getConstant(0, DL, VT);
3249 return SDValue();
3250}
3251
3252SDValue DAGCombiner::visitSUB(SDNode *N) {
3253 SDValue N0 = N->getOperand(0);
3254 SDValue N1 = N->getOperand(1);
3255 EVT VT = N0.getValueType();
3256 SDLoc DL(N);
3257
3258 // fold vector ops
3259 if (VT.isVector()) {
3260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3261 return FoldedVOp;
3262
3263 // fold (sub x, 0) -> x, vector edition
3264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3265 return N0;
3266 }
3267
3268 // fold (sub x, x) -> 0
3269 // FIXME: Refactor this and xor and other similar operations together.
3270 if (N0 == N1)
3271 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3272
3273 // fold (sub c1, c2) -> c3
3274 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3275 return C;
3276
3277 if (SDValue NewSel = foldBinOpIntoSelect(N))
3278 return NewSel;
3279
3280 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3281
3282 // fold (sub x, c) -> (add x, -c)
3283 if (N1C) {
3284 return DAG.getNode(ISD::ADD, DL, VT, N0,
3285 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3286 }
3287
3288 if (isNullOrNullSplat(N0)) {
3289 unsigned BitWidth = VT.getScalarSizeInBits();
3290 // Right-shifting everything out but the sign bit followed by negation is
3291 // the same as flipping arithmetic/logical shift type without the negation:
3292 // -(X >>u 31) -> (X >>s 31)
3293 // -(X >>s 31) -> (X >>u 31)
3294 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3295 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3296 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3297 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3298 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3299 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3300 }
3301 }
3302
3303 // 0 - X --> 0 if the sub is NUW.
3304 if (N->getFlags().hasNoUnsignedWrap())
3305 return N0;
3306
3307 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3308 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3309 // N1 must be 0 because negating the minimum signed value is undefined.
3310 if (N->getFlags().hasNoSignedWrap())
3311 return N0;
3312
3313 // 0 - X --> X if X is 0 or the minimum signed value.
3314 return N1;
3315 }
3316
3317 // Convert 0 - abs(x).
3318 SDValue Result;
3319 if (N1->getOpcode() == ISD::ABS &&
3320 !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3321 TLI.expandABS(N1.getNode(), Result, DAG, true))
3322 return Result;
3323
3324 // Fold neg(splat(neg(x)) -> splat(x)
3325 if (VT.isVector()) {
3326 SDValue N1S = DAG.getSplatValue(N1, true);
3327 if (N1S && N1S.getOpcode() == ISD::SUB &&
3328 isNullConstant(N1S.getOperand(0))) {
3329 if (VT.isScalableVector())
3330 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3331 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3332 }
3333 }
3334 }
3335
3336 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3337 if (isAllOnesOrAllOnesSplat(N0))
3338 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3339
3340 // fold (A - (0-B)) -> A+B
3341 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3342 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3343
3344 // fold A-(A-B) -> B
3345 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3346 return N1.getOperand(1);
3347
3348 // fold (A+B)-A -> B
3349 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3350 return N0.getOperand(1);
3351
3352 // fold (A+B)-B -> A
3353 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3354 return N0.getOperand(0);
3355
3356 // fold (A+C1)-C2 -> A+(C1-C2)
3357 if (N0.getOpcode() == ISD::ADD &&
3358 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3359 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3360 SDValue NewC =
3361 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3362 assert(NewC && "Constant folding failed")((void)0);
3363 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364 }
3365
3366 // fold C2-(A+C1) -> (C2-C1)-A
3367 if (N1.getOpcode() == ISD::ADD) {
3368 SDValue N11 = N1.getOperand(1);
3369 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3370 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3371 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3372 assert(NewC && "Constant folding failed")((void)0);
3373 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3374 }
3375 }
3376
3377 // fold (A-C1)-C2 -> A-(C1+C2)
3378 if (N0.getOpcode() == ISD::SUB &&
3379 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3380 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3381 SDValue NewC =
3382 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3383 assert(NewC && "Constant folding failed")((void)0);
3384 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385 }
3386
3387 // fold (c1-A)-c2 -> (c1-c2)-A
3388 if (N0.getOpcode() == ISD::SUB &&
3389 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3390 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3391 SDValue NewC =
3392 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3393 assert(NewC && "Constant folding failed")((void)0);
3394 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395 }
3396
3397 // fold ((A+(B+or-C))-B) -> A+or-C
3398 if (N0.getOpcode() == ISD::ADD &&
3399 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3400 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3401 N0.getOperand(1).getOperand(0) == N1)
3402 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3403 N0.getOperand(1).getOperand(1));
3404
3405 // fold ((A+(C+B))-B) -> A+C
3406 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3407 N0.getOperand(1).getOperand(1) == N1)
3408 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3409 N0.getOperand(1).getOperand(0));
3410
3411 // fold ((A-(B-C))-C) -> A-B
3412 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3413 N0.getOperand(1).getOperand(1) == N1)
3414 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3415 N0.getOperand(1).getOperand(0));
3416
3417 // fold (A-(B-C)) -> A+(C-B)
3418 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3419 return DAG.getNode(ISD::ADD, DL, VT, N0,
3420 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3421 N1.getOperand(0)));
3422
3423 // A - (A & B) -> A & (~B)
3424 if (N1.getOpcode() == ISD::AND) {
3425 SDValue A = N1.getOperand(0);
3426 SDValue B = N1.getOperand(1);
3427 if (A != N0)
3428 std::swap(A, B);
3429 if (A == N0 &&
3430 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3431 SDValue InvB =
3432 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3433 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3434 }
3435 }
3436
3437 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3438 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3439 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3440 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3441 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3442 N1.getOperand(0).getOperand(1),
3443 N1.getOperand(1));
3444 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3445 }
3446 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3447 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3448 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3449 N1.getOperand(0),
3450 N1.getOperand(1).getOperand(1));
3451 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3452 }
3453 }
3454
3455 // If either operand of a sub is undef, the result is undef
3456 if (N0.isUndef())
3457 return N0;
3458 if (N1.isUndef())
3459 return N1;
3460
3461 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3462 return V;
3463
3464 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3465 return V;
3466
3467 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3468 return V;
3469
3470 if (SDValue V = foldSubToUSubSat(VT, N))
3471 return V;
3472
3473 // (x - y) - 1 -> add (xor y, -1), x
3474 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3475 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3476 DAG.getAllOnesConstant(DL, VT));
3477 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478 }
3479
3480 // Look for:
3481 // sub y, (xor x, -1)
3482 // And if the target does not like this form then turn into:
3483 // add (add x, y), 1
3484 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3485 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3486 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487 }
3488
3489 // Hoist one-use addition by non-opaque constant:
3490 // (x + C) - y -> (x - y) + C
3491 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3492 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3493 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3494 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3495 }
3496 // y - (x + C) -> (y - x) - C
3497 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3498 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3499 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3500 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3501 }
3502 // (x - C) - y -> (x - y) - C
3503 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3504 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3505 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3506 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3507 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3508 }
3509 // (C - x) - y -> C - (x + y)
3510 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3511 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3512 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3513 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514 }
3515
3516 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3517 // rather than 'sub 0/1' (the sext should get folded).
3518 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3519 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3520 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3521 TLI.getBooleanContents(VT) ==
3522 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3523 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3524 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525 }
3526
3527 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3528 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3529 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3530 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3531 SDValue S0 = N1.getOperand(0);
3532 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3533 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3534 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3535 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3536 }
3537 }
3538
3539 // If the relocation model supports it, consider symbol offsets.
3540 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3541 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3542 // fold (sub Sym, c) -> Sym-c
3543 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3544 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3545 GA->getOffset() -
3546 (uint64_t)N1C->getSExtValue());
3547 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3548 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3549 if (GA->getGlobal() == GB->getGlobal())
3550 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3551 DL, VT);
3552 }
3553
3554 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3555 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3556 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3557 if (TN->getVT() == MVT::i1) {
3558 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3559 DAG.getConstant(1, DL, VT));
3560 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3561 }
3562 }
3563
3564 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3565 if (N1.getOpcode() == ISD::VSCALE) {
3566 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3567 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568 }
3569
3570 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3571 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3572 APInt NewStep = -N1.getConstantOperandAPInt(0);
3573 return DAG.getNode(ISD::ADD, DL, VT, N0,
3574 DAG.getStepVector(DL, VT, NewStep));
3575 }
3576
3577 // Prefer an add for more folding potential and possibly better codegen:
3578 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3579 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3580 SDValue ShAmt = N1.getOperand(1);
3581 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3582 if (ShAmtC &&
3583 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3584 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3585 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3586 }
3587 }
3588
3589 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3590 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3591 if (SDValue Carry = getAsCarry(TLI, N0)) {
3592 SDValue X = N1;
3593 SDValue Zero = DAG.getConstant(0, DL, VT);
3594 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3595 return DAG.getNode(ISD::ADDCARRY, DL,
3596 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3597 Carry);
3598 }
3599 }
3600
3601 return SDValue();
3602}
3603
3604SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3605 SDValue N0 = N->getOperand(0);
3606 SDValue N1 = N->getOperand(1);
3607 EVT VT = N0.getValueType();
3608 SDLoc DL(N);
3609
3610 // fold vector ops
3611 if (VT.isVector()) {
3612 // TODO SimplifyVBinOp
3613
3614 // fold (sub_sat x, 0) -> x, vector edition
3615 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3616 return N0;
3617 }
3618
3619 // fold (sub_sat x, undef) -> 0
3620 if (N0.isUndef() || N1.isUndef())
3621 return DAG.getConstant(0, DL, VT);
3622
3623 // fold (sub_sat x, x) -> 0
3624 if (N0 == N1)
3625 return DAG.getConstant(0, DL, VT);
3626
3627 // fold (sub_sat c1, c2) -> c3
3628 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3629 return C;
3630
3631 // fold (sub_sat x, 0) -> x
3632 if (isNullConstant(N1))
3633 return N0;
3634
3635 return SDValue();
3636}
3637
3638SDValue DAGCombiner::visitSUBC(SDNode *N) {
3639 SDValue N0 = N->getOperand(0);
3640 SDValue N1 = N->getOperand(1);
3641 EVT VT = N0.getValueType();
3642 SDLoc DL(N);
3643
3644 // If the flag result is dead, turn this into an SUB.
3645 if (!N->hasAnyUseOfValue(1))
3646 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3647 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3648
3649 // fold (subc x, x) -> 0 + no borrow
3650 if (N0 == N1)
3651 return CombineTo(N, DAG.getConstant(0, DL, VT),
3652 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3653
3654 // fold (subc x, 0) -> x + no borrow
3655 if (isNullConstant(N1))
3656 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657
3658 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3659 if (isAllOnesConstant(N0))
3660 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3661 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3662
3663 return SDValue();
3664}
3665
3666SDValue DAGCombiner::visitSUBO(SDNode *N) {
3667 SDValue N0 = N->getOperand(0);
3668 SDValue N1 = N->getOperand(1);
3669 EVT VT = N0.getValueType();
3670 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3671
3672 EVT CarryVT = N->getValueType(1);
3673 SDLoc DL(N);
3674
3675 // If the flag result is dead, turn this into an SUB.
3676 if (!N->hasAnyUseOfValue(1))
3677 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3678 DAG.getUNDEF(CarryVT));
3679
3680 // fold (subo x, x) -> 0 + no borrow
3681 if (N0 == N1)
3682 return CombineTo(N, DAG.getConstant(0, DL, VT),
3683 DAG.getConstant(0, DL, CarryVT));
3684
3685 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3686
3687 // fold (subox, c) -> (addo x, -c)
3688 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3689 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3690 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691 }
3692
3693 // fold (subo x, 0) -> x + no borrow
3694 if (isNullOrNullSplat(N1))
3695 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3696
3697 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3698 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3699 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3700 DAG.getConstant(0, DL, CarryVT));
3701
3702 return SDValue();
3703}
3704
3705SDValue DAGCombiner::visitSUBE(SDNode *N) {
3706 SDValue N0 = N->getOperand(0);
3707 SDValue N1 = N->getOperand(1);
3708 SDValue CarryIn = N->getOperand(2);
3709
3710 // fold (sube x, y, false) -> (subc x, y)
3711 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3712 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3713
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 SDValue CarryIn = N->getOperand(2);
3721
3722 // fold (subcarry x, y, false) -> (usubo x, y)
3723 if (isNullConstant(CarryIn)) {
3724 if (!LegalOperations ||
3725 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3726 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727 }
3728
3729 return SDValue();
3730}
3731
3732SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3733 SDValue N0 = N->getOperand(0);
3734 SDValue N1 = N->getOperand(1);
3735 SDValue CarryIn = N->getOperand(2);
3736
3737 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3738 if (isNullConstant(CarryIn)) {
3739 if (!LegalOperations ||
3740 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3741 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742 }
3743
3744 return SDValue();
3745}
3746
3747// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3748// UMULFIXSAT here.
3749SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3750 SDValue N0 = N->getOperand(0);
3751 SDValue N1 = N->getOperand(1);
3752 SDValue Scale = N->getOperand(2);
3753 EVT VT = N0.getValueType();
3754
3755 // fold (mulfix x, undef, scale) -> 0
3756 if (N0.isUndef() || N1.isUndef())
3757 return DAG.getConstant(0, SDLoc(N), VT);
3758
3759 // Canonicalize constant to RHS (vector doesn't have to splat)
3760 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3761 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3762 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3763
3764 // fold (mulfix x, 0, scale) -> 0
3765 if (isNullConstant(N1))
3766 return DAG.getConstant(0, SDLoc(N), VT);
3767
3768 return SDValue();
3769}
3770
3771SDValue DAGCombiner::visitMUL(SDNode *N) {
3772 SDValue N0 = N->getOperand(0);
3773 SDValue N1 = N->getOperand(1);
3774 EVT VT = N0.getValueType();
3775
3776 // fold (mul x, undef) -> 0
3777 if (N0.isUndef() || N1.isUndef())
3778 return DAG.getConstant(0, SDLoc(N), VT);
3779
3780 bool N1IsConst = false;
3781 bool N1IsOpaqueConst = false;
3782 APInt ConstValue1;
3783
3784 // fold vector ops
3785 if (VT.isVector()) {
3786 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3787 return FoldedVOp;
3788
3789 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3790 assert((!N1IsConst ||((void)0)
3791 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&((void)0)
3792 "Splat APInt should be element width")((void)0);
3793 } else {
3794 N1IsConst = isa<ConstantSDNode>(N1);
3795 if (N1IsConst) {
3796 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3797 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3798 }
3799 }
3800
3801 // fold (mul c1, c2) -> c1*c2
3802 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3803 return C;
3804
3805 // canonicalize constant to RHS (vector doesn't have to splat)
3806 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3807 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3808 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3809
3810 // fold (mul x, 0) -> 0
3811 if (N1IsConst && ConstValue1.isNullValue())
3812 return N1;
3813
3814 // fold (mul x, 1) -> x
3815 if (N1IsConst && ConstValue1.isOneValue())
3816 return N0;
3817
3818 if (SDValue NewSel = foldBinOpIntoSelect(N))
3819 return NewSel;
3820
3821 // fold (mul x, -1) -> 0-x
3822 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3823 SDLoc DL(N);
3824 return DAG.getNode(ISD::SUB, DL, VT,
3825 DAG.getConstant(0, DL, VT), N0);
3826 }
3827
3828 // fold (mul x, (1 << c)) -> x << c
3829 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830 DAG.isKnownToBeAPowerOfTwo(N1) &&
3831 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3832 SDLoc DL(N);
3833 SDValue LogBase2 = BuildLogBase2(N1, DL);
3834 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837 }
3838
3839 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3840 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3841 unsigned Log2Val = (-ConstValue1).logBase2();
3842 SDLoc DL(N);
3843 // FIXME: If the input is something that is easily negated (e.g. a
3844 // single-use add), we should put the negate there.
3845 return DAG.getNode(ISD::SUB, DL, VT,
3846 DAG.getConstant(0, DL, VT),
3847 DAG.getNode(ISD::SHL, DL, VT, N0,
3848 DAG.getConstant(Log2Val, DL,
3849 getShiftAmountTy(N0.getValueType()))));
3850 }
3851
3852 // Try to transform:
3853 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3854 // mul x, (2^N + 1) --> add (shl x, N), x
3855 // mul x, (2^N - 1) --> sub (shl x, N), x
3856 // Examples: x * 33 --> (x << 5) + x
3857 // x * 15 --> (x << 4) - x
3858 // x * -33 --> -((x << 5) + x)
3859 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3860 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3861 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3862 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3863 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3864 // x * 0xf800 --> (x << 16) - (x << 11)
3865 // x * -0x8800 --> -((x << 15) + (x << 11))
3866 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3867 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3868 // TODO: We could handle more general decomposition of any constant by
3869 // having the target set a limit on number of ops and making a
3870 // callback to determine that sequence (similar to sqrt expansion).
3871 unsigned MathOp = ISD::DELETED_NODE;
3872 APInt MulC = ConstValue1.abs();
3873 // The constant `2` should be treated as (2^0 + 1).
3874 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3875 MulC.lshrInPlace(TZeros);
3876 if ((MulC - 1).isPowerOf2())
3877 MathOp = ISD::ADD;
3878 else if ((MulC + 1).isPowerOf2())
3879 MathOp = ISD::SUB;
3880
3881 if (MathOp != ISD::DELETED_NODE) {
3882 unsigned ShAmt =
3883 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3884 ShAmt += TZeros;
3885 assert(ShAmt < VT.getScalarSizeInBits() &&((void)0)
3886 "multiply-by-constant generated out of bounds shift")((void)0);
3887 SDLoc DL(N);
3888 SDValue Shl =
3889 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3890 SDValue R =
3891 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3892 DAG.getNode(ISD::SHL, DL, VT, N0,
3893 DAG.getConstant(TZeros, DL, VT)))
3894 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3895 if (ConstValue1.isNegative())
3896 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3897 return R;
3898 }
3899 }
3900
3901 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3902 if (N0.getOpcode() == ISD::SHL &&
3903 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3904 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3905 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3906 if (isConstantOrConstantVector(C3))
3907 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908 }
3909
3910 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3911 // use.
3912 {
3913 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3914
3915 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3916 if (N0.getOpcode() == ISD::SHL &&
3917 isConstantOrConstantVector(N0.getOperand(1)) &&
3918 N0.getNode()->hasOneUse()) {
3919 Sh = N0; Y = N1;
3920 } else if (N1.getOpcode() == ISD::SHL &&
3921 isConstantOrConstantVector(N1.getOperand(1)) &&
3922 N1.getNode()->hasOneUse()) {
3923 Sh = N1; Y = N0;
3924 }
3925
3926 if (Sh.getNode()) {
3927 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3928 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3929 }
3930 }
3931
3932 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3933 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3934 N0.getOpcode() == ISD::ADD &&
3935 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3936 isMulAddWithConstProfitable(N, N0, N1))
3937 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3938 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3939 N0.getOperand(0), N1),
3940 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3941 N0.getOperand(1), N1));
3942
3943 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3944 if (N0.getOpcode() == ISD::VSCALE)
3945 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3946 const APInt &C0 = N0.getConstantOperandAPInt(0);
3947 const APInt &C1 = NC1->getAPIntValue();
3948 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949 }
3950
3951 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3952 APInt MulVal;
3953 if (N0.getOpcode() == ISD::STEP_VECTOR)
3954 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3955 const APInt &C0 = N0.getConstantOperandAPInt(0);
3956 APInt NewStep = C0 * MulVal;
3957 return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958 }
3959
3960 // Fold ((mul x, 0/undef) -> 0,
3961 // (mul x, 1) -> x) -> x)
3962 // -> and(x, mask)
3963 // We can replace vectors with '0' and '1' factors with a clearing mask.
3964 if (VT.isFixedLengthVector()) {
3965 unsigned NumElts = VT.getVectorNumElements();
3966 SmallBitVector ClearMask;
3967 ClearMask.reserve(NumElts);
3968 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3969 if (!V || V->isNullValue()) {
3970 ClearMask.push_back(true);
3971 return true;
3972 }
3973 ClearMask.push_back(false);
3974 return V->isOne();
3975 };
3976 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3977 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3978 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")((void)0);
3979 SDLoc DL(N);
3980 EVT LegalSVT = N1.getOperand(0).getValueType();
3981 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3982 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3983 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3984 for (unsigned I = 0; I != NumElts; ++I)
3985 if (ClearMask[I])
3986 Mask[I] = Zero;
3987 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3988 }
3989 }
3990
3991 // reassociate mul
3992 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3993 return RMUL;
3994
3995 return SDValue();
3996}
3997
3998/// Return true if divmod libcall is available.
3999static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4000 const TargetLowering &TLI) {
4001 RTLIB::Libcall LC;
4002 EVT NodeType = Node->getValueType(0);
4003 if (!NodeType.isSimple())
4004 return false;
4005 switch (NodeType.getSimpleVT().SimpleTy) {
4006 default: return false; // No libcall for vector types.
4007 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4008 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4009 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4010 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4011 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012 }
4013
4014 return TLI.getLibcallName(LC) != nullptr;
4015}
4016
4017/// Issue divrem if both quotient and remainder are needed.
4018SDValue DAGCombiner::useDivRem(SDNode *Node) {
4019 if (Node->use_empty())
4020 return SDValue(); // This is a dead node, leave it alone.
4021
4022 unsigned Opcode = Node->getOpcode();
4023 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4024 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4025
4026 // DivMod lib calls can still work on non-legal types if using lib-calls.
4027 EVT VT = Node->getValueType(0);
4028 if (VT.isVector() || !VT.isInteger())
4029 return SDValue();
4030
4031 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4032 return SDValue();
4033
4034 // If DIVREM is going to get expanded into a libcall,
4035 // but there is no libcall available, then don't combine.
4036 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4037 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4038 return SDValue();
4039
4040 // If div is legal, it's better to do the normal expansion
4041 unsigned OtherOpcode = 0;
4042 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4043 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4044 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4045 return SDValue();
4046 } else {
4047 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4048 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4049 return SDValue();
4050 }
4051
4052 SDValue Op0 = Node->getOperand(0);
4053 SDValue Op1 = Node->getOperand(1);
4054 SDValue combined;
4055 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4056 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4057 SDNode *User = *UI;
4058 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4059 User->use_empty())
4060 continue;
4061 // Convert the other matching node(s), too;
4062 // otherwise, the DIVREM may get target-legalized into something
4063 // target-specific that we won't be able to recognize.
4064 unsigned UserOpc = User->getOpcode();
4065 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4066 User->getOperand(0) == Op0 &&
4067 User->getOperand(1) == Op1) {
4068 if (!combined) {
4069 if (UserOpc == OtherOpcode) {
4070 SDVTList VTs = DAG.getVTList(VT, VT);
4071 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4072 } else if (UserOpc == DivRemOpc) {
4073 combined = SDValue(User, 0);
4074 } else {
4075 assert(UserOpc == Opcode)((void)0);
4076 continue;
4077 }
4078 }
4079 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4080 CombineTo(User, combined);
4081 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4082 CombineTo(User, combined.getValue(1));
4083 }
4084 }
4085 return combined;
4086}
4087
4088static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4089 SDValue N0 = N->getOperand(0);
4090 SDValue N1 = N->getOperand(1);
4091 EVT VT = N->getValueType(0);
4092 SDLoc DL(N);
4093
4094 unsigned Opc = N->getOpcode();
4095 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4096 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4097
4098 // X / undef -> undef
4099 // X % undef -> undef
4100 // X / 0 -> undef
4101 // X % 0 -> undef
4102 // NOTE: This includes vectors where any divisor element is zero/undef.
4103 if (DAG.isUndef(Opc, {N0, N1}))
4104 return DAG.getUNDEF(VT);
4105
4106 // undef / X -> 0
4107 // undef % X -> 0
4108 if (N0.isUndef())
4109 return DAG.getConstant(0, DL, VT);
4110
4111 // 0 / X -> 0
4112 // 0 % X -> 0
4113 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4114 if (N0C && N0C->isNullValue())
4115 return N0;
4116
4117 // X / X -> 1
4118 // X % X -> 0
4119 if (N0 == N1)
4120 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4121
4122 // X / 1 -> X
4123 // X % 1 -> 0
4124 // If this is a boolean op (single-bit element type), we can't have
4125 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4126 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4127 // it's a 1.
4128 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4129 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4130
4131 return SDValue();
4132}
4133
4134SDValue DAGCombiner::visitSDIV(SDNode *N) {
4135 SDValue N0 = N->getOperand(0);
4136 SDValue N1 = N->getOperand(1);
4137 EVT VT = N->getValueType(0);
4138 EVT CCVT = getSetCCResultType(VT);
4139
4140 // fold vector ops
4141 if (VT.isVector())
4142 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4143 return FoldedVOp;
4144
4145 SDLoc DL(N);
4146
4147 // fold (sdiv c1, c2) -> c1/c2
4148 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4149 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4150 return C;
4151
4152 // fold (sdiv X, -1) -> 0-X
4153 if (N1C && N1C->isAllOnesValue())
4154 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4155
4156 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4157 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4158 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4159 DAG.getConstant(1, DL, VT),
4160 DAG.getConstant(0, DL, VT));
4161
4162 if (SDValue V = simplifyDivRem(N, DAG))
4163 return V;
4164
4165 if (SDValue NewSel = foldBinOpIntoSelect(N))
4166 return NewSel;
4167
4168 // If we know the sign bits of both operands are zero, strength reduce to a
4169 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4170 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4171 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4172
4173 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4174 // If the corresponding remainder node exists, update its users with
4175 // (Dividend - (Quotient * Divisor).
4176 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4177 { N0, N1 })) {
4178 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4179 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4180 AddToWorklist(Mul.getNode());
4181 AddToWorklist(Sub.getNode());
4182 CombineTo(RemNode, Sub);
4183 }
4184 return V;
4185 }
4186
4187 // sdiv, srem -> sdivrem
4188 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4189 // true. Otherwise, we break the simplification logic in visitREM().
4190 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4191 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4192 if (SDValue DivRem = useDivRem(N))
4193 return DivRem;
4194
4195 return SDValue();
4196}
4197
4198SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4199 SDLoc DL(N);
4200 EVT VT = N->getValueType(0);
4201 EVT CCVT = getSetCCResultType(VT);
4202 unsigned BitWidth = VT.getScalarSizeInBits();
4203
4204 // Helper for determining whether a value is a power-2 constant scalar or a
4205 // vector of such elements.
4206 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4207 if (C->isNullValue() || C->isOpaque())
4208 return false;
4209 if (C->getAPIntValue().isPowerOf2())
4210 return true;
4211 if ((-C->getAPIntValue()).isPowerOf2())
4212 return true;
4213 return false;
4214 };
4215
4216 // fold (sdiv X, pow2) -> simple ops after legalize
4217 // FIXME: We check for the exact bit here because the generic lowering gives
4218 // better results in that case. The target-specific lowering should learn how
4219 // to handle exact sdivs efficiently.
4220 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4221 // Target-specific implementation of sdiv x, pow2.
4222 if (SDValue Res = BuildSDIVPow2(N))
4223 return Res;
4224
4225 // Create constants that are functions of the shift amount value.
4226 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4227 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4228 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4229 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4230 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4231 if (!isConstantOrConstantVector(Inexact))
4232 return SDValue();
4233
4234 // Splat the sign bit into the register
4235 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4236 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4237 AddToWorklist(Sign.getNode());
4238
4239 // Add (N0 < 0) ? abs2 - 1 : 0;
4240 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4241 AddToWorklist(Srl.getNode());
4242 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4243 AddToWorklist(Add.getNode());
4244 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4245 AddToWorklist(Sra.getNode());
4246
4247 // Special case: (sdiv X, 1) -> X
4248 // Special Case: (sdiv X, -1) -> 0-X
4249 SDValue One = DAG.getConstant(1, DL, VT);
4250 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4251 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4252 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4253 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4254 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4255
4256 // If dividing by a positive value, we're done. Otherwise, the result must
4257 // be negated.
4258 SDValue Zero = DAG.getConstant(0, DL, VT);
4259 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4260
4261 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4262 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4263 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4264 return Res;
4265 }
4266
4267 // If integer divide is expensive and we satisfy the requirements, emit an
4268 // alternate sequence. Targets may check function attributes for size/speed
4269 // trade-offs.
4270 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4271 if (isConstantOrConstantVector(N1) &&
4272 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4273 if (SDValue Op = BuildSDIV(N))
4274 return Op;
4275
4276 return SDValue();
4277}
4278
4279SDValue DAGCombiner::visitUDIV(SDNode *N) {
4280 SDValue N0 = N->getOperand(0);
4281 SDValue N1 = N->getOperand(1);
4282 EVT VT = N->getValueType(0);
4283 EVT CCVT = getSetCCResultType(VT);
4284
4285 // fold vector ops
4286 if (VT.isVector())
4287 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4288 return FoldedVOp;
4289
4290 SDLoc DL(N);
4291
4292 // fold (udiv c1, c2) -> c1/c2
4293 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4294 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4295 return C;
4296
4297 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4298 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4299 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4300 DAG.getConstant(1, DL, VT),
4301 DAG.getConstant(0, DL, VT));
4302
4303 if (SDValue V = simplifyDivRem(N, DAG))
4304 return V;
4305
4306 if (SDValue NewSel = foldBinOpIntoSelect(N))
4307 return NewSel;
4308
4309 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4310 // If the corresponding remainder node exists, update its users with
4311 // (Dividend - (Quotient * Divisor).
4312 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4313 { N0, N1 })) {
4314 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4315 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4316 AddToWorklist(Mul.getNode());
4317 AddToWorklist(Sub.getNode());
4318 CombineTo(RemNode, Sub);
4319 }
4320 return V;
4321 }
4322
4323 // sdiv, srem -> sdivrem
4324 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4325 // true. Otherwise, we break the simplification logic in visitREM().
4326 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4327 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4328 if (SDValue DivRem = useDivRem(N))
4329 return DivRem;
4330
4331 return SDValue();
4332}
4333
4334SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4335 SDLoc DL(N);
4336 EVT VT = N->getValueType(0);
4337
4338 // fold (udiv x, (1 << c)) -> x >>u c
4339 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4340 DAG.isKnownToBeAPowerOfTwo(N1)) {
4341 SDValue LogBase2 = BuildLogBase2(N1, DL);
4342 AddToWorklist(LogBase2.getNode());
4343
4344 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4345 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4346 AddToWorklist(Trunc.getNode());
4347 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4348 }
4349
4350 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4351 if (N1.getOpcode() == ISD::SHL) {
4352 SDValue N10 = N1.getOperand(0);
4353 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4354 DAG.isKnownToBeAPowerOfTwo(N10)) {
4355 SDValue LogBase2 = BuildLogBase2(N10, DL);
4356 AddToWorklist(LogBase2.getNode());
4357
4358 EVT ADDVT = N1.getOperand(1).getValueType();
4359 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4360 AddToWorklist(Trunc.getNode());
4361 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4362 AddToWorklist(Add.getNode());
4363 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4364 }
4365 }
4366
4367 // fold (udiv x, c) -> alternate
4368 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4369 if (isConstantOrConstantVector(N1) &&
4370 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4371 if (SDValue Op = BuildUDIV(N))
4372 return Op;
4373
4374 return SDValue();
4375}
4376
4377// handles ISD::SREM and ISD::UREM
4378SDValue DAGCombiner::visitREM(SDNode *N) {
4379 unsigned Opcode = N->getOpcode();
4380 SDValue N0 = N->getOperand(0);
4381 SDValue N1 = N->getOperand(1);
4382 EVT VT = N->getValueType(0);
4383 EVT CCVT = getSetCCResultType(VT);
4384
4385 bool isSigned = (Opcode == ISD::SREM);
4386 SDLoc DL(N);
4387
4388 // fold (rem c1, c2) -> c1%c2
4389 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4390 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4391 return C;
4392
4393 // fold (urem X, -1) -> select(X == -1, 0, x)
4394 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4395 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4396 DAG.getConstant(0, DL, VT), N0);
4397
4398 if (SDValue V = simplifyDivRem(N, DAG))
4399 return V;
4400
4401 if (SDValue NewSel = foldBinOpIntoSelect(N))
4402 return NewSel;
4403
4404 if (isSigned) {
4405 // If we know the sign bits of both operands are zero, strength reduce to a
4406 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4407 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4408 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4409 } else {
4410 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4411 // fold (urem x, pow2) -> (and x, pow2-1)
4412 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4413 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4414 AddToWorklist(Add.getNode());
4415 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4416 }
4417 if (N1.getOpcode() == ISD::SHL &&
4418 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4419 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4420 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4421 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4422 AddToWorklist(Add.getNode());
4423 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4424 }
4425 }
4426
4427 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4428
4429 // If X/C can be simplified by the division-by-constant logic, lower
4430 // X%C to the equivalent of X-X/C*C.
4431 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4432 // speculative DIV must not cause a DIVREM conversion. We guard against this
4433 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4434 // combine will not return a DIVREM. Regardless, checking cheapness here
4435 // makes sense since the simplification results in fatter code.
4436 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4437 SDValue OptimizedDiv =
4438 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4439 if (OptimizedDiv.getNode()) {
4440 // If the equivalent Div node also exists, update its users.
4441 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4442 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4443 { N0, N1 }))
4444 CombineTo(DivNode, OptimizedDiv);
4445 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4446 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4447 AddToWorklist(OptimizedDiv.getNode());
4448 AddToWorklist(Mul.getNode());
4449 return Sub;
4450 }
4451 }
4452
4453 // sdiv, srem -> sdivrem
4454 if (SDValue DivRem = useDivRem(N))
4455 return DivRem.getValue(1);
4456
4457 return SDValue();
4458}
4459
4460SDValue DAGCombiner::visitMULHS(SDNode *N) {
4461 SDValue N0 = N->getOperand(0);
4462 SDValue N1 = N->getOperand(1);
4463 EVT VT = N->getValueType(0);
4464 SDLoc DL(N);
4465
4466 if (VT.isVector()) {
4467 // fold (mulhs x, 0) -> 0
4468 // do not return N0/N1, because undef node may exist.
4469 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4470 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4471 return DAG.getConstant(0, DL, VT);
4472 }
4473
4474 // fold (mulhs c1, c2)
4475 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4476 return C;
4477
4478 // fold (mulhs x, 0) -> 0
4479 if (isNullConstant(N1))
4480 return N1;
4481 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4482 if (isOneConstant(N1))
4483 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4484 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4485 getShiftAmountTy(N0.getValueType())));
4486
4487 // fold (mulhs x, undef) -> 0
4488 if (N0.isUndef() || N1.isUndef())
4489 return DAG.getConstant(0, DL, VT);
4490
4491 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4492 // plus a shift.
4493 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4494 !VT.isVector()) {
4495 MVT Simple = VT.getSimpleVT();
4496 unsigned SimpleSize = Simple.getSizeInBits();
4497 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4498 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4499 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4500 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4501 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4502 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4503 DAG.getConstant(SimpleSize, DL,
4504 getShiftAmountTy(N1.getValueType())));
4505 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4506 }
4507 }
4508
4509 return SDValue();
4510}
4511
4512SDValue DAGCombiner::visitMULHU(SDNode *N) {
4513 SDValue N0 = N->getOperand(0);
4514 SDValue N1 = N->getOperand(1);
4515 EVT VT = N->getValueType(0);
4516 SDLoc DL(N);
4517
4518 if (VT.isVector()) {
4519 // fold (mulhu x, 0) -> 0
4520 // do not return N0/N1, because undef node may exist.
4521 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4522 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4523 return DAG.getConstant(0, DL, VT);
4524 }
4525
4526 // fold (mulhu c1, c2)
4527 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4528 return C;
4529
4530 // fold (mulhu x, 0) -> 0
4531 if (isNullConstant(N1))
4532 return N1;
4533 // fold (mulhu x, 1) -> 0
4534 if (isOneConstant(N1))
4535 return DAG.getConstant(0, DL, N0.getValueType());
4536 // fold (mulhu x, undef) -> 0
4537 if (N0.isUndef() || N1.isUndef())
4538 return DAG.getConstant(0, DL, VT);
4539
4540 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4541 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4542 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4543 unsigned NumEltBits = VT.getScalarSizeInBits();
4544 SDValue LogBase2 = BuildLogBase2(N1, DL);
4545 SDValue SRLAmt = DAG.getNode(
4546 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4547 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4548 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4549 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4550 }
4551
4552 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4553 // plus a shift.
4554 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4555 !VT.isVector()) {
4556 MVT Simple = VT.getSimpleVT();
4557 unsigned SimpleSize = Simple.getSizeInBits();
4558 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4559 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4560 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4561 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4562 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4563 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4564 DAG.getConstant(SimpleSize, DL,
4565 getShiftAmountTy(N1.getValueType())));
4566 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4567 }
4568 }
4569
4570 return SDValue();
4571}
4572
4573/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4574/// give the opcodes for the two computations that are being performed. Return
4575/// true if a simplification was made.
4576SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4577 unsigned HiOp) {
4578 // If the high half is not needed, just compute the low half.
4579 bool HiExists = N->hasAnyUseOfValue(1);
4580 if (!HiExists && (!LegalOperations ||
4581 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4582 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4583 return CombineTo(N, Res, Res);
4584 }
4585
4586 // If the low half is not needed, just compute the high half.
4587 bool LoExists = N->hasAnyUseOfValue(0);
4588 if (!LoExists && (!LegalOperations ||
4589 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4590 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4591 return CombineTo(N, Res, Res);
4592 }
4593
4594 // If both halves are used, return as it is.
4595 if (LoExists && HiExists)
4596 return SDValue();
4597
4598 // If the two computed results can be simplified separately, separate them.
4599 if (LoExists) {
4600 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4601 AddToWorklist(Lo.getNode());
4602 SDValue LoOpt = combine(Lo.getNode());
4603 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4604 (!LegalOperations ||
4605 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4606 return CombineTo(N, LoOpt, LoOpt);
4607 }
4608
4609 if (HiExists) {
4610 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4611 AddToWorklist(Hi.getNode());
4612 SDValue HiOpt = combine(Hi.getNode());
4613 if (HiOpt.getNode() && HiOpt != Hi &&
4614 (!LegalOperations ||
4615 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4616 return CombineTo(N, HiOpt, HiOpt);
4617 }
4618
4619 return SDValue();
4620}
4621
4622SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4623 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4624 return Res;
4625
4626 EVT VT = N->getValueType(0);
4627 SDLoc DL(N);
4628
4629 // If the type is twice as wide is legal, transform the mulhu to a wider
4630 // multiply plus a shift.
4631 if (VT.isSimple() && !VT.isVector()) {
4632 MVT Simple = VT.getSimpleVT();
4633 unsigned SimpleSize = Simple.getSizeInBits();
4634 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4635 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4636 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4637 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4638 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4639 // Compute the high part as N1.
4640 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4641 DAG.getConstant(SimpleSize, DL,
4642 getShiftAmountTy(Lo.getValueType())));
4643 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4644 // Compute the low part as N0.
4645 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4646 return CombineTo(N, Lo, Hi);
4647 }
4648 }
4649
4650 return SDValue();
4651}
4652
4653SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4654 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4655 return Res;
4656
4657 EVT VT = N->getValueType(0);
4658 SDLoc DL(N);
4659
4660 // (umul_lohi N0, 0) -> (0, 0)
4661 if (isNullConstant(N->getOperand(1))) {
4662 SDValue Zero = DAG.getConstant(0, DL, VT);
4663 return CombineTo(N, Zero, Zero);
4664 }
4665
4666 // (umul_lohi N0, 1) -> (N0, 0)
4667 if (isOneConstant(N->getOperand(1))) {
4668 SDValue Zero = DAG.getConstant(0, DL, VT);
4669 return CombineTo(N, N->getOperand(0), Zero);
4670 }
4671
4672 // If the type is twice as wide is legal, transform the mulhu to a wider
4673 // multiply plus a shift.
4674 if (VT.isSimple() && !VT.isVector()) {
4675 MVT Simple = VT.getSimpleVT();
4676 unsigned SimpleSize = Simple.getSizeInBits();
4677 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4678 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4679 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4680 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4681 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4682 // Compute the high part as N1.
4683 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4684 DAG.getConstant(SimpleSize, DL,
4685 getShiftAmountTy(Lo.getValueType())));
4686 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4687 // Compute the low part as N0.
4688 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4689 return CombineTo(N, Lo, Hi);
4690 }
4691 }
4692
4693 return SDValue();
4694}
4695
4696SDValue DAGCombiner::visitMULO(SDNode *N) {
4697 SDValue N0 = N->getOperand(0);
4698 SDValue N1 = N->getOperand(1);
4699 EVT VT = N0.getValueType();
4700 bool IsSigned = (ISD::SMULO == N->getOpcode());
4701
4702 EVT CarryVT = N->getValueType(1);
4703 SDLoc DL(N);
4704
4705 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4706 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4707
4708 // fold operation with constant operands.
4709 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4710 // multiple results.
4711 if (N0C && N1C) {
4712 bool Overflow;
4713 APInt Result =
4714 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4715 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4716 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4717 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4718 }
4719
4720 // canonicalize constant to RHS.
4721 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4722 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4723 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4724
4725 // fold (mulo x, 0) -> 0 + no carry out
4726 if (isNullOrNullSplat(N1))
4727 return CombineTo(N, DAG.getConstant(0, DL, VT),
4728 DAG.getConstant(0, DL, CarryVT));
4729
4730 // (mulo x, 2) -> (addo x, x)
4731 if (N1C && N1C->getAPIntValue() == 2)
4732 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4733 N->getVTList(), N0, N0);
4734
4735 if (IsSigned) {
4736 // A 1 bit SMULO overflows if both inputs are 1.
4737 if (VT.getScalarSizeInBits() == 1) {
4738 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4739 return CombineTo(N, And,
4740 DAG.getSetCC(DL, CarryVT, And,
4741 DAG.getConstant(0, DL, VT), ISD::SETNE));
4742 }
4743
4744 // Multiplying n * m significant bits yields a result of n + m significant
4745 // bits. If the total number of significant bits does not exceed the
4746 // result bit width (minus 1), there is no overflow.
4747 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4748 if (SignBits > 1)
4749 SignBits += DAG.ComputeNumSignBits(N1);
4750 if (SignBits > VT.getScalarSizeInBits() + 1)
4751 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4752 DAG.getConstant(0, DL, CarryVT));
4753 } else {
4754 KnownBits N1Known = DAG.computeKnownBits(N1);
4755 KnownBits N0Known = DAG.computeKnownBits(N0);
4756 bool Overflow;
4757 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4758 if (!Overflow)
4759 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4760 DAG.getConstant(0, DL, CarryVT));
4761 }
4762
4763 return SDValue();
4764}
4765
4766SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4767 SDValue N0 = N->getOperand(0);
4768 SDValue N1 = N->getOperand(1);
4769 EVT VT = N0.getValueType();
4770 unsigned Opcode = N->getOpcode();
4771
4772 // fold vector ops
4773 if (VT.isVector())
4774 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4775 return FoldedVOp;
4776
4777 // fold operation with constant operands.
4778 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4779 return C;
4780
4781 // canonicalize constant to RHS
4782 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4783 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4784 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4785
4786 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4787 // Only do this if the current op isn't legal and the flipped is.
4788 if (!TLI.isOperationLegal(Opcode, VT) &&
4789 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4790 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4791 unsigned AltOpcode;
4792 switch (Opcode) {
4793 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4794 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4795 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4796 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4797 default: llvm_unreachable("Unknown MINMAX opcode")__builtin_unreachable();
4798 }
4799 if (TLI.isOperationLegal(AltOpcode, VT))
4800 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4801 }
4802
4803 // Simplify the operands using demanded-bits information.
4804 if (SimplifyDemandedBits(SDValue(N, 0)))
4805 return SDValue(N, 0);
4806
4807 return SDValue();
4808}
4809
4810/// If this is a bitwise logic instruction and both operands have the same
4811/// opcode, try to sink the other opcode after the logic instruction.
4812SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4813 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4814 EVT VT = N0.getValueType();
4815 unsigned LogicOpcode = N->getOpcode();
4816 unsigned HandOpcode = N0.getOpcode();
4817 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||((void)0)
4818 LogicOpcode == ISD::XOR) && "Expected logic opcode")((void)0);
4819 assert(HandOpcode == N1.getOpcode() && "Bad input!")((void)0);
4820
4821 // Bail early if none of these transforms apply.
4822 if (N0.getNumOperands() == 0)
4823 return SDValue();
4824
4825 // FIXME: We should check number of uses of the operands to not increase
4826 // the instruction count for all transforms.
4827
4828 // Handle size-changing casts.
4829 SDValue X = N0.getOperand(0);
4830 SDValue Y = N1.getOperand(0);
4831 EVT XVT = X.getValueType();
4832 SDLoc DL(N);
4833 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4834 HandOpcode == ISD::SIGN_EXTEND) {
4835 // If both operands have other uses, this transform would create extra
4836 // instructions without eliminating anything.
4837 if (!N0.hasOneUse() && !N1.hasOneUse())
4838 return SDValue();
4839 // We need matching integer source types.
4840 if (XVT != Y.getValueType())
4841 return SDValue();
4842 // Don't create an illegal op during or after legalization. Don't ever
4843 // create an unsupported vector op.
4844 if ((VT.isVector() || LegalOperations) &&
4845 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4846 return SDValue();
4847 // Avoid infinite looping with PromoteIntBinOp.
4848 // TODO: Should we apply desirable/legal constraints to all opcodes?
4849 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4850 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4851 return SDValue();
4852 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4853 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4854 return DAG.getNode(HandOpcode, DL, VT, Logic);
4855 }
4856
4857 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4858 if (HandOpcode == ISD::TRUNCATE) {
4859 // If both operands have other uses, this transform would create extra
4860 // instructions without eliminating anything.
4861 if (!N0.hasOneUse() && !N1.hasOneUse())
4862 return SDValue();
4863 // We need matching source types.
4864 if (XVT != Y.getValueType())
4865 return SDValue();
4866 // Don't create an illegal op during or after legalization.
4867 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4868 return SDValue();
4869 // Be extra careful sinking truncate. If it's free, there's no benefit in
4870 // widening a binop. Also, don't create a logic op on an illegal type.
4871 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4872 return SDValue();
4873 if (!TLI.isTypeLegal(XVT))
4874 return SDValue();
4875 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4876 return DAG.getNode(HandOpcode, DL, VT, Logic);
4877 }
4878
4879 // For binops SHL/SRL/SRA/AND:
4880 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4881 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4882 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4883 N0.getOperand(1) == N1.getOperand(1)) {
4884 // If either operand has other uses, this transform is not an improvement.
4885 if (!N0.hasOneUse() || !N1.hasOneUse())
4886 return SDValue();
4887 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4888 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4889 }
4890
4891 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4892 if (HandOpcode == ISD::BSWAP) {
4893 // If either operand has other uses, this transform is not an improvement.
4894 if (!N0.hasOneUse() || !N1.hasOneUse())
4895 return SDValue();
4896 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4897 return DAG.getNode(HandOpcode, DL, VT, Logic);
4898 }
4899
4900 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4901 // Only perform this optimization up until type legalization, before
4902 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4903 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4904 // we don't want to undo this promotion.
4905 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4906 // on scalars.
4907 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4908 Level <= AfterLegalizeTypes) {
4909 // Input types must be integer and the same.
4910 if (XVT.isInteger() && XVT == Y.getValueType() &&
4911 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4912 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4913 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4914 return DAG.getNode(HandOpcode, DL, VT, Logic);
4915 }
4916 }
4917
4918 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4919 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4920 // If both shuffles use the same mask, and both shuffle within a single
4921 // vector, then it is worthwhile to move the swizzle after the operation.
4922 // The type-legalizer generates this pattern when loading illegal
4923 // vector types from memory. In many cases this allows additional shuffle
4924 // optimizations.
4925 // There are other cases where moving the shuffle after the xor/and/or
4926 // is profitable even if shuffles don't perform a swizzle.
4927 // If both shuffles use the same mask, and both shuffles have the same first
4928 // or second operand, then it might still be profitable to move the shuffle
4929 // after the xor/and/or operation.
4930 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4931 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4932 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4933 assert(X.getValueType() == Y.getValueType() &&((void)0)
4934 "Inputs to shuffles are not the same type")((void)0);
4935
4936 // Check that both shuffles use the same mask. The masks are known to be of
4937 // the same length because the result vector type is the same.
4938 // Check also that shuffles have only one use to avoid introducing extra
4939 // instructions.
4940 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4941 !SVN0->getMask().equals(SVN1->getMask()))
4942 return SDValue();
4943
4944 // Don't try to fold this node if it requires introducing a
4945 // build vector of all zeros that might be illegal at this stage.
4946 SDValue ShOp = N0.getOperand(1);
4947 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4948 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4949
4950 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4951 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4952 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4953 N0.getOperand(0), N1.getOperand(0));
4954 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4955 }
4956
4957 // Don't try to fold this node if it requires introducing a
4958 // build vector of all zeros that might be illegal at this stage.
4959 ShOp = N0.getOperand(0);
4960 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4961 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4962
4963 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4964 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4965 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4966 N1.getOperand(1));
4967 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4968 }
4969 }
4970
4971 return SDValue();
4972}
4973
4974/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4975SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4976 const SDLoc &DL) {
4977 SDValue LL, LR, RL, RR, N0CC, N1CC;
4978 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4979 !isSetCCEquivalent(N1, RL, RR, N1CC))
4980 return SDValue();
4981
4982 assert(N0.getValueType() == N1.getValueType() &&((void)0)
4983 "Unexpected operand types for bitwise logic op")((void)0);
4984 assert(LL.getValueType() == LR.getValueType() &&((void)0)
4985 RL.getValueType() == RR.getValueType() &&((void)0)
4986 "Unexpected operand types for setcc")((void)0);
4987
4988 // If we're here post-legalization or the logic op type is not i1, the logic
4989 // op type must match a setcc result type. Also, all folds require new
4990 // operations on the left and right operands, so those types must match.
4991 EVT VT = N0.getValueType();
4992 EVT OpVT = LL.getValueType();
4993 if (LegalOperations || VT.getScalarType() != MVT::i1)
4994 if (VT != getSetCCResultType(OpVT))
4995 return SDValue();
4996 if (OpVT != RL.getValueType())
4997 return SDValue();
4998
4999 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5000 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5001 bool IsInteger = OpVT.isInteger();
5002 if (LR == RR && CC0 == CC1 && IsInteger) {
5003 bool IsZero = isNullOrNullSplat(LR);
5004 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5005
5006 // All bits clear?
5007 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5008 // All sign bits clear?
5009 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5010 // Any bits set?
5011 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5012 // Any sign bits set?
5013 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5014
5015 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5016 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5017 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5018 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5019 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5020 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5021 AddToWorklist(Or.getNode());
5022 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5023 }
5024
5025 // All bits set?
5026 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5027 // All sign bits set?
5028 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5029 // Any bits clear?
5030 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5031 // Any sign bits clear?
5032 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5033
5034 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5035 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5036 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5037 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5038 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5039 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5040 AddToWorklist(And.getNode());
5041 return DAG.getSetCC(DL, VT, And, LR, CC1);
5042 }
5043 }
5044
5045 // TODO: What is the 'or' equivalent of this fold?
5046 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5047 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5048 IsInteger && CC0 == ISD::SETNE &&
5049 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5050 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5051 SDValue One = DAG.getConstant(1, DL, OpVT);
5052 SDValue Two = DAG.getConstant(2, DL, OpVT);
5053 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5054 AddToWorklist(Add.getNode());
5055 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5056 }
5057
5058 // Try more general transforms if the predicates match and the only user of
5059 // the compares is the 'and' or 'or'.
5060 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5061 N0.hasOneUse() && N1.hasOneUse()) {
5062 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5063 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5064 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5065 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5066 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5067 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5068 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5069 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5070 }
5071
5072 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5073 // TODO - support non-uniform vector amounts.
5074 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5075 // Match a shared variable operand and 2 non-opaque constant operands.
5076 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5077 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5078 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5079 const APInt &CMax =
5080 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5081 const APInt &CMin =
5082 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5083 // The difference of the constants must be a single bit.
5084 if ((CMax - CMin).isPowerOf2()) {
5085 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5086 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5087 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5088 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5089 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5090 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5091 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5092 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5093 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5094 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5095 }
5096 }
5097 }
5098 }
5099
5100 // Canonicalize equivalent operands to LL == RL.
5101 if (LL == RR && LR == RL) {
5102 CC1 = ISD::getSetCCSwappedOperands(CC1);
5103 std::swap(RL, RR);
5104 }
5105
5106 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5107 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5108 if (LL == RL && LR == RR) {
5109 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5110 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5111 if (NewCC != ISD::SETCC_INVALID &&
5112 (!LegalOperations ||
5113 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5114 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5115 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5116 }
5117
5118 return SDValue();
5119}
5120
5121/// This contains all DAGCombine rules which reduce two values combined by
5122/// an And operation to a single value. This makes them reusable in the context
5123/// of visitSELECT(). Rules involving constants are not included as
5124/// visitSELECT() already handles those cases.
5125SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5126 EVT VT = N1.getValueType();
5127 SDLoc DL(N);
5128
5129 // fold (and x, undef) -> 0
5130 if (N0.isUndef() || N1.isUndef())
5131 return DAG.getConstant(0, DL, VT);
5132
5133 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5134 return V;
5135
5136 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5137 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5138 VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5139 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5140 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5141 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5142 // immediate for an add, but it is legal if its top c2 bits are set,
5143 // transform the ADD so the immediate doesn't need to be materialized
5144 // in a register.
5145 APInt ADDC = ADDI->getAPIntValue();
5146 APInt SRLC = SRLI->getAPIntValue();
5147 if (ADDC.getMinSignedBits() <= 64 &&
5148 SRLC.ult(VT.getSizeInBits()) &&
5149 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5150 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5151 SRLC.getZExtValue());
5152 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5153 ADDC |= Mask;
5154 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5155 SDLoc DL0(N0);
5156 SDValue NewAdd =
5157 DAG.getNode(ISD::ADD, DL0, VT,
5158 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5159 CombineTo(N0.getNode(), NewAdd);
5160 // Return N so it doesn't get rechecked!
5161 return SDValue(N, 0);
5162 }
5163 }
5164 }
5165 }
5166 }
5167 }
5168
5169 // Reduce bit extract of low half of an integer to the narrower type.
5170 // (and (srl i64:x, K), KMask) ->
5171 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5172 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5173 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5174 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5175 unsigned Size = VT.getSizeInBits();
5176 const APInt &AndMask = CAnd->getAPIntValue();
5177 unsigned ShiftBits = CShift->getZExtValue();
5178
5179 // Bail out, this node will probably disappear anyway.
5180 if (ShiftBits == 0)
5181 return SDValue();
5182
5183 unsigned MaskBits = AndMask.countTrailingOnes();
5184 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5185
5186 if (AndMask.isMask() &&
5187 // Required bits must not span the two halves of the integer and
5188 // must fit in the half size type.
5189 (ShiftBits + MaskBits <= Size / 2) &&
5190 TLI.isNarrowingProfitable(VT, HalfVT) &&
5191 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5192 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5193 TLI.isTruncateFree(VT, HalfVT) &&
5194 TLI.isZExtFree(HalfVT, VT)) {
5195 // The isNarrowingProfitable is to avoid regressions on PPC and
5196 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5197 // on downstream users of this. Those patterns could probably be
5198 // extended to handle extensions mixed in.
5199
5200 SDValue SL(N0);
5201 assert(MaskBits <= Size)((void)0);
5202
5203 // Extracting the highest bit of the low half.
5204 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5205 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5206 N0.getOperand(0));
5207
5208 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5209 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5210 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5211 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5212 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5213 }
5214 }
5215 }
5216 }
5217
5218 return SDValue();
5219}
5220
5221bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5222 EVT LoadResultTy, EVT &ExtVT) {
5223 if (!AndC->getAPIntValue().isMask())
5224 return false;
5225
5226 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5227
5228 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5229 EVT LoadedVT = LoadN->getMemoryVT();
5230
5231 if (ExtVT == LoadedVT &&
5232 (!LegalOperations ||
5233 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5234 // ZEXTLOAD will match without needing to change the size of the value being
5235 // loaded.
5236 return true;
5237 }
5238
5239 // Do not change the width of a volatile or atomic loads.
5240 if (!LoadN->isSimple())
5241 return false;
5242
5243 // Do not generate loads of non-round integer types since these can
5244 // be expensive (and would be wrong if the type is not byte sized).
5245 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5246 return false;
5247
5248 if (LegalOperations &&
5249 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5250 return false;
5251
5252 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5253 return false;
5254
5255 return true;
5256}
5257
5258bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5259 ISD::LoadExtType ExtType, EVT &MemVT,
5260 unsigned ShAmt) {
5261 if (!LDST)
5262 return false;
5263 // Only allow byte offsets.
5264 if (ShAmt % 8)
5265 return false;
5266
5267 // Do not generate loads of non-round integer types since these can
5268 // be expensive (and would be wrong if the type is not byte sized).
5269 if (!MemVT.isRound())
5270 return false;
5271
5272 // Don't change the width of a volatile or atomic loads.
5273 if (!LDST->isSimple())
5274 return false;
5275
5276 EVT LdStMemVT = LDST->getMemoryVT();
5277
5278 // Bail out when changing the scalable property, since we can't be sure that
5279 // we're actually narrowing here.
5280 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5281 return false;
5282
5283 // Verify that we are actually reducing a load width here.
5284 if (LdStMemVT.bitsLT(MemVT))
5285 return false;
5286
5287 // Ensure that this isn't going to produce an unsupported memory access.
5288 if (ShAmt) {
5289 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")((void)0);
5290 const unsigned ByteShAmt = ShAmt / 8;
5291 const Align LDSTAlign = LDST->getAlign();
5292 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5293 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5294 LDST->getAddressSpace(), NarrowAlign,
5295 LDST->getMemOperand()->getFlags()))
5296 return false;
5297 }
5298
5299 // It's not possible to generate a constant of extended or untyped type.
5300 EVT PtrType = LDST->getBasePtr().getValueType();
5301 if (PtrType == MVT::Untyped || PtrType.isExtended())
5302 return false;
5303
5304 if (isa<LoadSDNode>(LDST)) {
5305 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5306 // Don't transform one with multiple uses, this would require adding a new
5307 // load.
5308 if (!SDValue(Load, 0).hasOneUse())
5309 return false;
5310
5311 if (LegalOperations &&
5312 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5313 return false;
5314
5315 // For the transform to be legal, the load must produce only two values
5316 // (the value loaded and the chain). Don't transform a pre-increment
5317 // load, for example, which produces an extra value. Otherwise the
5318 // transformation is not equivalent, and the downstream logic to replace
5319 // uses gets things wrong.
5320 if (Load->getNumValues() > 2)
5321 return false;
5322
5323 // If the load that we're shrinking is an extload and we're not just
5324 // discarding the extension we can't simply shrink the load. Bail.
5325 // TODO: It would be possible to merge the extensions in some cases.
5326 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5327 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5328 return false;
5329
5330 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5331 return false;
5332 } else {
5333 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")((void)0);
5334 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5335 // Can't write outside the original store
5336 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5337 return false;
5338
5339 if (LegalOperations &&
5340 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5341 return false;
5342 }
5343 return true;
5344}
5345
5346bool DAGCombiner::SearchForAndLoads(SDNode *N,
5347 SmallVectorImpl<LoadSDNode*> &Loads,
5348 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5349 ConstantSDNode *Mask,
5350 SDNode *&NodeToMask) {
5351 // Recursively search for the operands, looking for loads which can be
5352 // narrowed.
5353 for (SDValue Op : N->op_values()) {
5354 if (Op.getValueType().isVector())
5355 return false;
5356
5357 // Some constants may need fixing up later if they are too large.
5358 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5359 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5360 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5361 NodesWithConsts.insert(N);
5362 continue;
5363 }
5364
5365 if (!Op.hasOneUse())
5366 return false;
5367
5368 switch(Op.getOpcode()) {
5369 case ISD::LOAD: {
5370 auto *Load = cast<LoadSDNode>(Op);
5371 EVT ExtVT;
5372 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5373 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5374
5375 // ZEXTLOAD is already small enough.
5376 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5377 ExtVT.bitsGE(Load->getMemoryVT()))
5378 continue;
5379
5380 // Use LE to convert equal sized loads to zext.
5381 if (ExtVT.bitsLE(Load->getMemoryVT()))
5382 Loads.push_back(Load);
5383
5384 continue;
5385 }
5386 return false;
5387 }
5388 case ISD::ZERO_EXTEND:
5389 case ISD::AssertZext: {
5390 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5391 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5392 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5393 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5394 Op.getOperand(0).getValueType();
5395
5396 // We can accept extending nodes if the mask is wider or an equal
5397 // width to the original type.
5398 if (ExtVT.bitsGE(VT))
5399 continue;
5400 break;
5401 }
5402 case ISD::OR:
5403 case ISD::XOR:
5404 case ISD::AND:
5405 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5406 NodeToMask))
5407 return false;
5408 continue;
5409 }
5410
5411 // Allow one node which will masked along with any loads found.
5412 if (NodeToMask)
5413 return false;
5414
5415 // Also ensure that the node to be masked only produces one data result.
5416 NodeToMask = Op.getNode();
5417 if (NodeToMask->getNumValues() > 1) {
5418 bool HasValue = false;
5419 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5420 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5421 if (VT != MVT::Glue && VT != MVT::Other) {
5422 if (HasValue) {
5423 NodeToMask = nullptr;
5424 return false;
5425 }
5426 HasValue = true;
5427 }
5428 }
5429 assert(HasValue && "Node to be masked has no data result?")((void)0);
5430 }
5431 }
5432 return true;
5433}
5434
5435bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5436 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5437 if (!Mask)
5438 return false;
5439
5440 if (!Mask->getAPIntValue().isMask())
5441 return false;
5442
5443 // No need to do anything if the and directly uses a load.
5444 if (isa<LoadSDNode>(N->getOperand(0)))
5445 return false;
5446
5447 SmallVector<LoadSDNode*, 8> Loads;
5448 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5449 SDNode *FixupNode = nullptr;
5450 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5451 if (Loads.size() == 0)
5452 return false;
5453
5454 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { } while (false);
5455 SDValue MaskOp = N->getOperand(1);
5456
5457 // If it exists, fixup the single node we allow in the tree that needs
5458 // masking.
5459 if (FixupNode) {
5460 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { } while (false);
5461 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5462 FixupNode->getValueType(0),
5463 SDValue(FixupNode, 0), MaskOp);
5464 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5465 if (And.getOpcode() == ISD ::AND)
5466 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5467 }
5468
5469 // Narrow any constants that need it.
5470 for (auto *LogicN : NodesWithConsts) {
5471 SDValue Op0 = LogicN->getOperand(0);
5472 SDValue Op1 = LogicN->getOperand(1);
5473
5474 if (isa<ConstantSDNode>(Op0))
5475 std::swap(Op0, Op1);
5476
5477 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5478 Op1, MaskOp);
5479
5480 DAG.UpdateNodeOperands(LogicN, Op0, And);
5481 }
5482
5483 // Create narrow loads.
5484 for (auto *Load : Loads) {
5485 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { } while (false);
5486 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5487 SDValue(Load, 0), MaskOp);
5488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5489 if (And.getOpcode() == ISD ::AND)
5490 And = SDValue(
5491 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5492 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5493 assert(NewLoad &&((void)0)
5494 "Shouldn't be masking the load if it can't be narrowed")((void)0);
5495 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5496 }
5497 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5498 return true;
5499 }
5500 return false;
5501}
5502
5503// Unfold
5504// x & (-1 'logical shift' y)
5505// To
5506// (x 'opposite logical shift' y) 'logical shift' y
5507// if it is better for performance.
5508SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5509 assert(N->getOpcode() == ISD::AND)((void)0);
5510
5511 SDValue N0 = N->getOperand(0);
5512 SDValue N1 = N->getOperand(1);
5513
5514 // Do we actually prefer shifts over mask?
5515 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5516 return SDValue();
5517
5518 // Try to match (-1 '[outer] logical shift' y)
5519 unsigned OuterShift;
5520 unsigned InnerShift; // The opposite direction to the OuterShift.
5521 SDValue Y; // Shift amount.
5522 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5523 if (!M.hasOneUse())
5524 return false;
5525 OuterShift = M->getOpcode();
5526 if (OuterShift == ISD::SHL)
5527 InnerShift = ISD::SRL;
5528 else if (OuterShift == ISD::SRL)
5529 InnerShift = ISD::SHL;
5530 else
5531 return false;
5532 if (!isAllOnesConstant(M->getOperand(0)))
5533 return false;
5534 Y = M->getOperand(1);
5535 return true;
5536 };
5537
5538 SDValue X;
5539 if (matchMask(N1))
5540 X = N0;
5541 else if (matchMask(N0))
5542 X = N1;
5543 else
5544 return SDValue();
5545
5546 SDLoc DL(N);
5547 EVT VT = N->getValueType(0);
5548
5549 // tmp = x 'opposite logical shift' y
5550 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5551 // ret = tmp 'logical shift' y
5552 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5553
5554 return T1;
5555}
5556
5557/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5558/// For a target with a bit test, this is expected to become test + set and save
5559/// at least 1 instruction.
5560static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5561 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")((void)0);
5562
5563 // This is probably not worthwhile without a supported type.
5564 EVT VT = And->getValueType(0);
5565 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5566 if (!TLI.isTypeLegal(VT))
5567 return SDValue();
5568
5569 // Look through an optional extension and find a 'not'.
5570 // TODO: Should we favor test+set even without the 'not' op?
5571 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5572 if (Not.getOpcode() == ISD::ANY_EXTEND)
5573 Not = Not.getOperand(0);
5574 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5575 return SDValue();
5576
5577 // Look though an optional truncation. The source operand may not be the same
5578 // type as the original 'and', but that is ok because we are masking off
5579 // everything but the low bit.
5580 SDValue Srl = Not.getOperand(0);
5581 if (Srl.getOpcode() == ISD::TRUNCATE)
5582 Srl = Srl.getOperand(0);
5583
5584 // Match a shift-right by constant.
5585 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5586 !isa<ConstantSDNode>(Srl.getOperand(1)))
5587 return SDValue();
5588
5589 // We might have looked through casts that make this transform invalid.
5590 // TODO: If the source type is wider than the result type, do the mask and
5591 // compare in the source type.
5592 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5593 unsigned VTBitWidth = VT.getSizeInBits();
5594 if (ShiftAmt.uge(VTBitWidth))
5595 return SDValue();
5596
5597 // Turn this into a bit-test pattern using mask op + setcc:
5598 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5599 SDLoc DL(And);
5600 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5601 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5602 SDValue Mask = DAG.getConstant(
5603 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5604 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5605 SDValue Zero = DAG.getConstant(0, DL, VT);
5606 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5607 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5608}
5609
5610SDValue DAGCombiner::visitAND(SDNode *N) {
5611 SDValue N0 = N->getOperand(0);
5612 SDValue N1 = N->getOperand(1);
5613 EVT VT = N1.getValueType();
5614
5615 // x & x --> x
5616 if (N0 == N1)
5617 return N0;
5618
5619 // fold vector ops
5620 if (VT.isVector()) {
5621 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5622 return FoldedVOp;
5623
5624 // fold (and x, 0) -> 0, vector edition
5625 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5626 // do not return N0, because undef node may exist in N0
5627 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5628 SDLoc(N), N0.getValueType());
5629 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5630 // do not return N1, because undef node may exist in N1
5631 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5632 SDLoc(N), N1.getValueType());
5633
5634 // fold (and x, -1) -> x, vector edition
5635 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5636 return N1;
5637 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5638 return N0;
5639
5640 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5641 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5642 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5643 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5644 N0.hasOneUse() && N1.hasOneUse()) {
5645 EVT LoadVT = MLoad->getMemoryVT();
5646 EVT ExtVT = VT;
5647 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5648 // For this AND to be a zero extension of the masked load the elements
5649 // of the BuildVec must mask the bottom bits of the extended element
5650 // type
5651 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5652 uint64_t ElementSize =
5653 LoadVT.getVectorElementType().getScalarSizeInBits();
5654 if (Splat->getAPIntValue().isMask(ElementSize)) {
5655 return DAG.getMaskedLoad(
5656 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5657 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5658 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5659 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5660 }
5661 }
5662 }
5663 }
5664 }
5665
5666 // fold (and c1, c2) -> c1&c2
5667 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5668 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5669 return C;
5670
5671 // canonicalize constant to RHS
5672 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5673 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5674 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5675
5676 // fold (and x, -1) -> x
5677 if (isAllOnesConstant(N1))
5678 return N0;
5679
5680 // if (and x, c) is known to be zero, return 0
5681 unsigned BitWidth = VT.getScalarSizeInBits();
5682 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5683 APInt::getAllOnesValue(BitWidth)))
5684 return DAG.getConstant(0, SDLoc(N), VT);
5685
5686 if (SDValue NewSel = foldBinOpIntoSelect(N))
5687 return NewSel;
5688
5689 // reassociate and
5690 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5691 return RAND;
5692
5693 // Try to convert a constant mask AND into a shuffle clear mask.
5694 if (VT.isVector())
5695 if (SDValue Shuffle = XformToShuffleWithZero(N))
5696 return Shuffle;
5697
5698 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5699 return Combined;
5700
5701 // fold (and (or x, C), D) -> D if (C & D) == D
5702 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5703 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5704 };
5705 if (N0.getOpcode() == ISD::OR &&
5706 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5707 return N1;
5708 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5709 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5710 SDValue N0Op0 = N0.getOperand(0);
5711 APInt Mask = ~N1C->getAPIntValue();
5712 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5713 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5714 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5715 N0.getValueType(), N0Op0);
5716
5717 // Replace uses of the AND with uses of the Zero extend node.
5718 CombineTo(N, Zext);
5719
5720 // We actually want to replace all uses of the any_extend with the
5721 // zero_extend, to avoid duplicating things. This will later cause this
5722 // AND to be folded.
5723 CombineTo(N0.getNode(), Zext);
5724 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5725 }
5726 }
5727
5728 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5729 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5730 // already be zero by virtue of the width of the base type of the load.
5731 //
5732 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5733 // more cases.
5734 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5735 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5736 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5737 N0.getOperand(0).getResNo() == 0) ||
5738 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5739 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5740 N0 : N0.getOperand(0) );
5741
5742 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5743 // This can be a pure constant or a vector splat, in which case we treat the
5744 // vector as a scalar and use the splat value.
5745 APInt Constant = APInt::getNullValue(1);
5746 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5747 Constant = C->getAPIntValue();
5748 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5749 APInt SplatValue, SplatUndef;
5750 unsigned SplatBitSize;
5751 bool HasAnyUndefs;
5752 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5753 SplatBitSize, HasAnyUndefs);
5754 if (IsSplat) {
5755 // Undef bits can contribute to a possible optimisation if set, so
5756 // set them.
5757 SplatValue |= SplatUndef;
5758
5759 // The splat value may be something like "0x00FFFFFF", which means 0 for
5760 // the first vector value and FF for the rest, repeating. We need a mask
5761 // that will apply equally to all members of the vector, so AND all the
5762 // lanes of the constant together.
5763 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5764
5765 // If the splat value has been compressed to a bitlength lower
5766 // than the size of the vector lane, we need to re-expand it to
5767 // the lane size.
5768 if (EltBitWidth > SplatBitSize)
5769 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5770 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5771 SplatValue |= SplatValue.shl(SplatBitSize);
5772
5773 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5774 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5775 if ((SplatBitSize % EltBitWidth) == 0) {
5776 Constant = APInt::getAllOnesValue(EltBitWidth);
5777 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5778 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5779 }
5780 }
5781 }
5782
5783 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5784 // actually legal and isn't going to get expanded, else this is a false
5785 // optimisation.
5786 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5787 Load->getValueType(0),
5788 Load->getMemoryVT());
5789
5790 // Resize the constant to the same size as the original memory access before
5791 // extension. If it is still the AllOnesValue then this AND is completely
5792 // unneeded.
5793 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5794
5795 bool B;
5796 switch (Load->getExtensionType()) {
5797 default: B = false; break;
5798 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5799 case ISD::ZEXTLOAD:
5800 case ISD::NON_EXTLOAD: B = true; break;
5801 }
5802
5803 if (B && Constant.isAllOnesValue()) {
5804 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5805 // preserve semantics once we get rid of the AND.
5806 SDValue NewLoad(Load, 0);
5807
5808 // Fold the AND away. NewLoad may get replaced immediately.
5809 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5810
5811 if (Load->getExtensionType() == ISD::EXTLOAD) {
5812 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5813 Load->getValueType(0), SDLoc(Load),
5814 Load->getChain(), Load->getBasePtr(),
5815 Load->getOffset(), Load->getMemoryVT(),
5816 Load->getMemOperand());
5817 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5818 if (Load->getNumValues() == 3) {
5819 // PRE/POST_INC loads have 3 values.
5820 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5821 NewLoad.getValue(2) };
5822 CombineTo(Load, To, 3, true);
5823 } else {
5824 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5825 }
5826 }
5827
5828 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5829 }
5830 }
5831
5832 // fold (and (masked_gather x)) -> (zext_masked_gather x)
5833 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5834 EVT MemVT = GN0->getMemoryVT();
5835 EVT ScalarVT = MemVT.getScalarType();
5836
5837 if (SDValue(GN0, 0).hasOneUse() &&
5838 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5839 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5840 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5841 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5842
5843 SDValue ZExtLoad = DAG.getMaskedGather(
5844 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5845 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5846
5847 CombineTo(N, ZExtLoad);
5848 AddToWorklist(ZExtLoad.getNode());
5849 // Avoid recheck of N.
5850 return SDValue(N, 0);
5851 }
5852 }
5853
5854 // fold (and (load x), 255) -> (zextload x, i8)
5855 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5856 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5857 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5858 (N0.getOpcode() == ISD::ANY_EXTEND &&
5859 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5860 if (SDValue Res = ReduceLoadWidth(N)) {
5861 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5862 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5863 AddToWorklist(N);
5864 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5865 return SDValue(N, 0);
5866 }
5867 }
5868
5869 if (LegalTypes) {
5870 // Attempt to propagate the AND back up to the leaves which, if they're
5871 // loads, can be combined to narrow loads and the AND node can be removed.
5872 // Perform after legalization so that extend nodes will already be
5873 // combined into the loads.
5874 if (BackwardsPropagateMask(N))
5875 return SDValue(N, 0);
5876 }
5877
5878 if (SDValue Combined = visitANDLike(N0, N1, N))
5879 return Combined;
5880
5881 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5882 if (N0.getOpcode() == N1.getOpcode())
5883 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5884 return V;
5885
5886 // Masking the negated extension of a boolean is just the zero-extended
5887 // boolean:
5888 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5889 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5890 //
5891 // Note: the SimplifyDemandedBits fold below can make an information-losing
5892 // transform, and then we have no way to find this better fold.
5893 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5894 if (isNullOrNullSplat(N0.getOperand(0))) {
5895 SDValue SubRHS = N0.getOperand(1);
5896 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5897 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5898 return SubRHS;
5899 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5900 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5901 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5902 }
5903 }
5904
5905 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5906 // fold (and (sra)) -> (and (srl)) when possible.
5907 if (SimplifyDemandedBits(SDValue(N, 0)))
5908 return SDValue(N, 0);
5909
5910 // fold (zext_inreg (extload x)) -> (zextload x)
5911 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5912 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5913 (ISD::isEXTLoad(N0.getNode()) ||
5914 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5915 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5916 EVT MemVT = LN0->getMemoryVT();
5917 // If we zero all the possible extended bits, then we can turn this into
5918 // a zextload if we are running before legalize or the operation is legal.
5919 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5920 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5921 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5922 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5923 ((!LegalOperations && LN0->isSimple()) ||
5924 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5925 SDValue ExtLoad =
5926 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5927 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5928 AddToWorklist(N);
5929 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5930 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5931 }
5932 }
5933
5934 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5935 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5936 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5937 N0.getOperand(1), false))
5938 return BSwap;
5939 }
5940
5941 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5942 return Shifts;
5943
5944 if (TLI.hasBitTest(N0, N1))
5945 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5946 return V;
5947
5948 // Recognize the following pattern:
5949 //
5950 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5951 //
5952 // where bitmask is a mask that clears the upper bits of AndVT. The
5953 // number of bits in bitmask must be a power of two.
5954 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5955 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5956 return false;
5957
5958 auto *C = dyn_cast<ConstantSDNode>(RHS);
5959 if (!C)
5960 return false;
5961
5962 if (!C->getAPIntValue().isMask(
5963 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5964 return false;
5965
5966 return true;
5967 };
5968
5969 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5970 if (IsAndZeroExtMask(N0, N1))
5971 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5972
5973 return SDValue();
5974}
5975
5976/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5977SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5978 bool DemandHighBits) {
5979 if (!LegalOperations)
5980 return SDValue();
5981
5982 EVT VT = N->getValueType(0);
5983 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5984 return SDValue();
5985 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5986 return SDValue();
5987
5988 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5989 bool LookPassAnd0 = false;
5990 bool LookPassAnd1 = false;
5991 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5992 std::swap(N0, N1);
5993 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5994 std::swap(N0, N1);
5995 if (N0.getOpcode() == ISD::AND) {
5996 if (!N0.getNode()->hasOneUse())
5997 return SDValue();
5998 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5999 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6000 // This is needed for X86.
6001 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6002 N01C->getZExtValue() != 0xFFFF))
6003 return SDValue();
6004 N0 = N0.getOperand(0);
6005 LookPassAnd0 = true;
6006 }
6007
6008 if (N1.getOpcode() == ISD::AND) {
6009 if (!N1.getNode()->hasOneUse())
6010 return SDValue();
6011 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6012 if (!N11C || N11C->getZExtValue() != 0xFF)
6013 return SDValue();
6014 N1 = N1.getOperand(0);
6015 LookPassAnd1 = true;
6016 }
6017
6018 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6019 std::swap(N0, N1);
6020 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6021 return SDValue();
6022 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6023 return SDValue();
6024
6025 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6026 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6027 if (!N01C || !N11C)
6028 return SDValue();
6029 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6030 return SDValue();
6031
6032 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6033 SDValue N00 = N0->getOperand(0);
6034 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6035 if (!N00.getNode()->hasOneUse())
6036 return SDValue();
6037 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6038 if (!N001C || N001C->getZExtValue() != 0xFF)
6039 return SDValue();
6040 N00 = N00.getOperand(0);
6041 LookPassAnd0 = true;
6042 }
6043
6044 SDValue N10 = N1->getOperand(0);
6045 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6046 if (!N10.getNode()->hasOneUse())
6047 return SDValue();
6048 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6049 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6050 // for X86.
6051 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6052 N101C->getZExtValue() != 0xFFFF))
6053 return SDValue();
6054 N10 = N10.getOperand(0);
6055 LookPassAnd1 = true;
6056 }
6057
6058 if (N00 != N10)
6059 return SDValue();
6060
6061 // Make sure everything beyond the low halfword gets set to zero since the SRL
6062 // 16 will clear the top bits.
6063 unsigned OpSizeInBits = VT.getSizeInBits();
6064 if (DemandHighBits && OpSizeInBits > 16) {
6065 // If the left-shift isn't masked out then the only way this is a bswap is
6066 // if all bits beyond the low 8 are 0. In that case the entire pattern
6067 // reduces to a left shift anyway: leave it for other parts of the combiner.
6068 if (!LookPassAnd0)
6069 return SDValue();
6070
6071 // However, if the right shift isn't masked out then it might be because
6072 // it's not needed. See if we can spot that too.
6073 if (!LookPassAnd1 &&
6074 !DAG.MaskedValueIsZero(
6075 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6076 return SDValue();
6077 }
6078
6079 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6080 if (OpSizeInBits > 16) {
6081 SDLoc DL(N);
6082 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6083 DAG.getConstant(OpSizeInBits - 16, DL,
6084 getShiftAmountTy(VT)));
6085 }
6086 return Res;
6087}
6088
6089/// Return true if the specified node is an element that makes up a 32-bit
6090/// packed halfword byteswap.
6091/// ((x & 0x000000ff) << 8) |
6092/// ((x & 0x0000ff00) >> 8) |
6093/// ((x & 0x00ff0000) << 8) |
6094/// ((x & 0xff000000) >> 8)
6095static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6096 if (!N.getNode()->hasOneUse())
6097 return false;
6098
6099 unsigned Opc = N.getOpcode();
6100 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6101 return false;
6102
6103 SDValue N0 = N.getOperand(0);
6104 unsigned Opc0 = N0.getOpcode();
6105 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6106 return false;
6107
6108 ConstantSDNode *N1C = nullptr;
6109 // SHL or SRL: look upstream for AND mask operand
6110 if (Opc == ISD::AND)
6111 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6112 else if (Opc0 == ISD::AND)
6113 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6114 if (!N1C)
6115 return false;
6116
6117 unsigned MaskByteOffset;
6118 switch (N1C->getZExtValue()) {
6119 default:
6120 return false;
6121 case 0xFF: MaskByteOffset = 0; break;
6122 case 0xFF00: MaskByteOffset = 1; break;
6123 case 0xFFFF:
6124 // In case demanded bits didn't clear the bits that will be shifted out.
6125 // This is needed for X86.
6126 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6127 MaskByteOffset = 1;
6128 break;
6129 }
6130 return false;
6131 case 0xFF0000: MaskByteOffset = 2; break;
6132 case 0xFF000000: MaskByteOffset = 3; break;
6133 }
6134
6135 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6136 if (Opc == ISD::AND) {
6137 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6138 // (x >> 8) & 0xff
6139 // (x >> 8) & 0xff0000
6140 if (Opc0 != ISD::SRL)
6141 return false;
6142 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6143 if (!C || C->getZExtValue() != 8)
6144 return false;
6145 } else {
6146 // (x << 8) & 0xff00
6147 // (x << 8) & 0xff000000
6148 if (Opc0 != ISD::SHL)
6149 return false;
6150 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6151 if (!C || C->getZExtValue() != 8)
6152 return false;
6153 }
6154 } else if (Opc == ISD::SHL) {
6155 // (x & 0xff) << 8
6156 // (x & 0xff0000) << 8
6157 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6158 return false;
6159 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6160 if (!C || C->getZExtValue() != 8)
6161 return false;
6162 } else { // Opc == ISD::SRL
6163 // (x & 0xff00) >> 8
6164 // (x & 0xff000000) >> 8
6165 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6166 return false;
6167 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6168 if (!C || C->getZExtValue() != 8)
6169 return false;
6170 }
6171
6172 if (Parts[MaskByteOffset])
6173 return false;
6174
6175 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6176 return true;
6177}
6178
6179// Match 2 elements of a packed halfword bswap.
6180static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6181 if (N.getOpcode() == ISD::OR)
6182 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6183 isBSwapHWordElement(N.getOperand(1), Parts);
6184
6185 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6186 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6187 if (!C || C->getAPIntValue() != 16)
6188 return false;
6189 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6190 return true;
6191 }
6192
6193 return false;
6194}
6195
6196// Match this pattern:
6197// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6198// And rewrite this to:
6199// (rotr (bswap A), 16)
6200static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6201 SelectionDAG &DAG, SDNode *N, SDValue N0,
6202 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6203 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&((void)0)
6204 "MatchBSwapHWordOrAndAnd: expecting i32")((void)0);
6205 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6206 return SDValue();
6207 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6208 return SDValue();
6209 // TODO: this is too restrictive; lifting this restriction requires more tests
6210 if (!N0->hasOneUse() || !N1->hasOneUse())
6211 return SDValue();
6212 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6213 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6214 if (!Mask0 || !Mask1)
6215 return SDValue();
6216 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6217 Mask1->getAPIntValue() != 0x00ff00ff)
6218 return SDValue();
6219 SDValue Shift0 = N0.getOperand(0);
6220 SDValue Shift1 = N1.getOperand(0);
6221 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6222 return SDValue();
6223 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6224 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6225 if (!ShiftAmt0 || !ShiftAmt1)
6226 return SDValue();
6227 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6228 return SDValue();
6229 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6230 return SDValue();
6231
6232 SDLoc DL(N);
6233 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6234 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6235 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6236}
6237
6238/// Match a 32-bit packed halfword bswap. That is
6239/// ((x & 0x000000ff) << 8) |
6240/// ((x & 0x0000ff00) >> 8) |
6241/// ((x & 0x00ff0000) << 8) |
6242/// ((x & 0xff000000) >> 8)
6243/// => (rotl (bswap x), 16)
6244SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6245 if (!LegalOperations)
6246 return SDValue();
6247
6248 EVT VT = N->getValueType(0);
6249 if (VT != MVT::i32)
6250 return SDValue();
6251 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6252 return SDValue();
6253
6254 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6255 getShiftAmountTy(VT)))
6256 return BSwap;
6257
6258 // Try again with commuted operands.
6259 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6260 getShiftAmountTy(VT)))
6261 return BSwap;
6262
6263
6264 // Look for either
6265 // (or (bswaphpair), (bswaphpair))
6266 // (or (or (bswaphpair), (and)), (and))
6267 // (or (or (and), (bswaphpair)), (and))
6268 SDNode *Parts[4] = {};
6269
6270 if (isBSwapHWordPair(N0, Parts)) {
6271 // (or (or (and), (and)), (or (and), (and)))
6272 if (!isBSwapHWordPair(N1, Parts))
6273 return SDValue();
6274 } else if (N0.getOpcode() == ISD::OR) {
6275 // (or (or (or (and), (and)), (and)), (and))
6276 if (!isBSwapHWordElement(N1, Parts))
6277 return SDValue();
6278 SDValue N00 = N0.getOperand(0);
6279 SDValue N01 = N0.getOperand(1);
6280 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6281 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6282 return SDValue();
6283 } else
6284 return SDValue();
6285
6286 // Make sure the parts are all coming from the same node.
6287 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6288 return SDValue();
6289
6290 SDLoc DL(N);
6291 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6292 SDValue(Parts[0], 0));
6293
6294 // Result of the bswap should be rotated by 16. If it's not legal, then
6295 // do (x << 16) | (x >> 16).
6296 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6297 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6298 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6299 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6300 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6301 return DAG.getNode(ISD::OR, DL, VT,
6302 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6303 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6304}
6305
6306/// This contains all DAGCombine rules which reduce two values combined by
6307/// an Or operation to a single value \see visitANDLike().
6308SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6309 EVT VT = N1.getValueType();
6310 SDLoc DL(N);
6311
6312 // fold (or x, undef) -> -1
6313 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6314 return DAG.getAllOnesConstant(DL, VT);
6315
6316 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6317 return V;
6318
6319 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6320 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6321 // Don't increase # computations.
6322 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6323 // We can only do this xform if we know that bits from X that are set in C2
6324 // but not in C1 are already zero. Likewise for Y.
6325 if (const ConstantSDNode *N0O1C =
6326 getAsNonOpaqueConstant(N0.getOperand(1))) {
6327 if (const ConstantSDNode *N1O1C =
6328 getAsNonOpaqueConstant(N1.getOperand(1))) {
6329 // We can only do this xform if we know that bits from X that are set in
6330 // C2 but not in C1 are already zero. Likewise for Y.
6331 const APInt &LHSMask = N0O1C->getAPIntValue();
6332 const APInt &RHSMask = N1O1C->getAPIntValue();
6333
6334 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6335 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6336 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6337 N0.getOperand(0), N1.getOperand(0));
6338 return DAG.getNode(ISD::AND, DL, VT, X,
6339 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6340 }
6341 }
6342 }
6343 }
6344
6345 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6346 if (N0.getOpcode() == ISD::AND &&
6347 N1.getOpcode() == ISD::AND &&
6348 N0.getOperand(0) == N1.getOperand(0) &&
6349 // Don't increase # computations.
6350 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6351 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6352 N0.getOperand(1), N1.getOperand(1));
6353 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6354 }
6355
6356 return SDValue();
6357}
6358
6359/// OR combines for which the commuted variant will be tried as well.
6360static SDValue visitORCommutative(
6361 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6362 EVT VT = N0.getValueType();
6363 if (N0.getOpcode() == ISD::AND) {
6364 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6365 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6366 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6367
6368 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6369 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6370 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6371 }
6372
6373 return SDValue();
6374}
6375
6376SDValue DAGCombiner::visitOR(SDNode *N) {
6377 SDValue N0 = N->getOperand(0);
6378 SDValue N1 = N->getOperand(1);
6379 EVT VT = N1.getValueType();
6380
6381 // x | x --> x
6382 if (N0 == N1)
6383 return N0;
6384
6385 // fold vector ops
6386 if (VT.isVector()) {
6387 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6388 return FoldedVOp;
6389
6390 // fold (or x, 0) -> x, vector edition
6391 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6392 return N1;
6393 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6394 return N0;
6395
6396 // fold (or x, -1) -> -1, vector edition
6397 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6398 // do not return N0, because undef node may exist in N0
6399 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6400 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6401 // do not return N1, because undef node may exist in N1
6402 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6403
6404 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6405 // Do this only if the resulting shuffle is legal.
6406 if (isa<ShuffleVectorSDNode>(N0) &&
6407 isa<ShuffleVectorSDNode>(N1) &&
6408 // Avoid folding a node with illegal type.
6409 TLI.isTypeLegal(VT)) {
6410 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6411 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6412 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6413 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6414 // Ensure both shuffles have a zero input.
6415 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6416 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")((void)0);
6417 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")((void)0);
6418 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6419 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6420 bool CanFold = true;
6421 int NumElts = VT.getVectorNumElements();
6422 SmallVector<int, 4> Mask(NumElts);
6423
6424 for (int i = 0; i != NumElts; ++i) {
6425 int M0 = SV0->getMaskElt(i);
6426 int M1 = SV1->getMaskElt(i);
6427
6428 // Determine if either index is pointing to a zero vector.
6429 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6430 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6431
6432 // If one element is zero and the otherside is undef, keep undef.
6433 // This also handles the case that both are undef.
6434 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6435 Mask[i] = -1;
6436 continue;
6437 }
6438
6439 // Make sure only one of the elements is zero.
6440 if (M0Zero == M1Zero) {
6441 CanFold = false;
6442 break;
6443 }
6444
6445 assert((M0 >= 0 || M1 >= 0) && "Undef index!")((void)0);
6446
6447 // We have a zero and non-zero element. If the non-zero came from
6448 // SV0 make the index a LHS index. If it came from SV1, make it
6449 // a RHS index. We need to mod by NumElts because we don't care
6450 // which operand it came from in the original shuffles.
6451 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6452 }
6453
6454 if (CanFold) {
6455 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6456 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6457
6458 SDValue LegalShuffle =
6459 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6460 Mask, DAG);
6461 if (LegalShuffle)
6462 return LegalShuffle;
6463 }
6464 }
6465 }
6466 }
6467
6468 // fold (or c1, c2) -> c1|c2
6469 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6470 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6471 return C;
6472
6473 // canonicalize constant to RHS
6474 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6475 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6476 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6477
6478 // fold (or x, 0) -> x
6479 if (isNullConstant(N1))
6480 return N0;
6481
6482 // fold (or x, -1) -> -1
6483 if (isAllOnesConstant(N1))
6484 return N1;
6485
6486 if (SDValue NewSel = foldBinOpIntoSelect(N))
6487 return NewSel;
6488
6489 // fold (or x, c) -> c iff (x & ~c) == 0
6490 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6491 return N1;
6492
6493 if (SDValue Combined = visitORLike(N0, N1, N))
6494 return Combined;
6495
6496 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6497 return Combined;
6498
6499 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6500 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6501 return BSwap;
6502 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6503 return BSwap;
6504
6505 // reassociate or
6506 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6507 return ROR;
6508
6509 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6510 // iff (c1 & c2) != 0 or c1/c2 are undef.
6511 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6512 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6513 };
6514 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6515 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6516 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6517 {N1, N0.getOperand(1)})) {
6518 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6519 AddToWorklist(IOR.getNode());
6520 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6521 }
6522 }
6523
6524 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6525 return Combined;
6526 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6527 return Combined;
6528
6529 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6530 if (N0.getOpcode() == N1.getOpcode())
6531 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6532 return V;
6533
6534 // See if this is some rotate idiom.
6535 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6536 return Rot;
6537
6538 if (SDValue Load = MatchLoadCombine(N))
6539 return Load;
6540
6541 // Simplify the operands using demanded-bits information.
6542 if (SimplifyDemandedBits(SDValue(N, 0)))
6543 return SDValue(N, 0);
6544
6545 // If OR can be rewritten into ADD, try combines based on ADD.
6546 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6547 DAG.haveNoCommonBitsSet(N0, N1))
6548 if (SDValue Combined = visitADDLike(N))
6549 return Combined;
6550
6551 return SDValue();
6552}
6553
6554static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6555 if (Op.getOpcode() == ISD::AND &&
6556 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6557 Mask = Op.getOperand(1);
6558 return Op.getOperand(0);
6559 }
6560 return Op;
6561}
6562
6563/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6564static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6565 SDValue &Mask) {
6566 Op = stripConstantMask(DAG, Op, Mask);
6567 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6568 Shift = Op;
6569 return true;
6570 }
6571 return false;
6572}
6573
6574/// Helper function for visitOR to extract the needed side of a rotate idiom
6575/// from a shl/srl/mul/udiv. This is meant to handle cases where
6576/// InstCombine merged some outside op with one of the shifts from
6577/// the rotate pattern.
6578/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6579/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6580/// patterns:
6581///
6582/// (or (add v v) (shrl v bitwidth-1)):
6583/// expands (add v v) -> (shl v 1)
6584///
6585/// (or (mul v c0) (shrl (mul v c1) c2)):
6586/// expands (mul v c0) -> (shl (mul v c1) c3)
6587///
6588/// (or (udiv v c0) (shl (udiv v c1) c2)):
6589/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6590///
6591/// (or (shl v c0) (shrl (shl v c1) c2)):
6592/// expands (shl v c0) -> (shl (shl v c1) c3)
6593///
6594/// (or (shrl v c0) (shl (shrl v c1) c2)):
6595/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6596///
6597/// Such that in all cases, c3+c2==bitwidth(op v c1).
6598static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6599 SDValue ExtractFrom, SDValue &Mask,
6600 const SDLoc &DL) {
6601 assert(OppShift && ExtractFrom && "Empty SDValue")((void)0);
6602 assert(((void)0)
6603 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&((void)0)
6604 "Existing shift must be valid as a rotate half")((void)0);
6605
6606 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6607
6608 // Value and Type of the shift.
6609 SDValue OppShiftLHS = OppShift.getOperand(0);
6610 EVT ShiftedVT = OppShiftLHS.getValueType();
6611
6612 // Amount of the existing shift.
6613 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6614
6615 // (add v v) -> (shl v 1)
6616 // TODO: Should this be a general DAG canonicalization?
6617 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6618 ExtractFrom.getOpcode() == ISD::ADD &&
6619 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6620 ExtractFrom.getOperand(0) == OppShiftLHS &&
6621 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6622 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6623 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6624
6625 // Preconditions:
6626 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6627 //
6628 // Find opcode of the needed shift to be extracted from (op0 v c0).
6629 unsigned Opcode = ISD::DELETED_NODE;
6630 bool IsMulOrDiv = false;
6631 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6632 // opcode or its arithmetic (mul or udiv) variant.
6633 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6634 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6635 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6636 return false;
6637 Opcode = NeededShift;
6638 return true;
6639 };
6640 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6641 // that the needed shift can be extracted from.
6642 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6643 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6644 return SDValue();
6645
6646 // op0 must be the same opcode on both sides, have the same LHS argument,
6647 // and produce the same value type.
6648 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6649 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6650 ShiftedVT != ExtractFrom.getValueType())
6651 return SDValue();
6652
6653 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6654 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6655 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6656 ConstantSDNode *ExtractFromCst =
6657 isConstOrConstSplat(ExtractFrom.getOperand(1));
6658 // TODO: We should be able to handle non-uniform constant vectors for these values
6659 // Check that we have constant values.
6660 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6661 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6662 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6663 return SDValue();
6664
6665 // Compute the shift amount we need to extract to complete the rotate.
6666 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6667 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6668 return SDValue();
6669 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6670 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6671 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6672 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6673 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6674
6675 // Now try extract the needed shift from the ExtractFrom op and see if the
6676 // result matches up with the existing shift's LHS op.
6677 if (IsMulOrDiv) {
6678 // Op to extract from is a mul or udiv by a constant.
6679 // Check:
6680 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6681 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6682 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6683 NeededShiftAmt.getZExtValue());
6684 APInt ResultAmt;
6685 APInt Rem;
6686 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6687 if (Rem != 0 || ResultAmt != OppLHSAmt)
6688 return SDValue();
6689 } else {
6690 // Op to extract from is a shift by a constant.
6691 // Check:
6692 // c2 - (bitwidth(op0 v c0) - c1) == c0
6693 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6694 ExtractFromAmt.getBitWidth()))
6695 return SDValue();
6696 }
6697
6698 // Return the expanded shift op that should allow a rotate to be formed.
6699 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6700 EVT ResVT = ExtractFrom.getValueType();
6701 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6702 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6703}
6704
6705// Return true if we can prove that, whenever Neg and Pos are both in the
6706// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6707// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6708//
6709// (or (shift1 X, Neg), (shift2 X, Pos))
6710//
6711// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6712// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6713// to consider shift amounts with defined behavior.
6714//
6715// The IsRotate flag should be set when the LHS of both shifts is the same.
6716// Otherwise if matching a general funnel shift, it should be clear.
6717static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6718 SelectionDAG &DAG, bool IsRotate) {
6719 // If EltSize is a power of 2 then:
6720 //
6721 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6722 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6723 //
6724 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6725 // for the stronger condition:
6726 //
6727 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6728 //
6729 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6730 // we can just replace Neg with Neg' for the rest of the function.
6731 //
6732 // In other cases we check for the even stronger condition:
6733 //
6734 // Neg == EltSize - Pos [B]
6735 //
6736 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6737 // behavior if Pos == 0 (and consequently Neg == EltSize).
6738 //
6739 // We could actually use [A] whenever EltSize is a power of 2, but the
6740 // only extra cases that it would match are those uninteresting ones
6741 // where Neg and Pos are never in range at the same time. E.g. for
6742 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6743 // as well as (sub 32, Pos), but:
6744 //
6745 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6746 //
6747 // always invokes undefined behavior for 32-bit X.
6748 //
6749 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6750 //
6751 // NOTE: We can only do this when matching an AND and not a general
6752 // funnel shift.
6753 unsigned MaskLoBits = 0;
6754 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6755 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6756 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6757 unsigned Bits = Log2_64(EltSize);
6758 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6759 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6760 Neg = Neg.getOperand(0);
6761 MaskLoBits = Bits;
6762 }
6763 }
6764 }
6765
6766 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6767 if (Neg.getOpcode() != ISD::SUB)
6768 return false;
6769 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6770 if (!NegC)
6771 return false;
6772 SDValue NegOp1 = Neg.getOperand(1);
6773
6774 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6775 // Pos'. The truncation is redundant for the purpose of the equality.
6776 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6777 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6778 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6779 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6780 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6781 MaskLoBits))
6782 Pos = Pos.getOperand(0);
6783 }
6784 }
6785
6786 // The condition we need is now:
6787 //
6788 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6789 //
6790 // If NegOp1 == Pos then we need:
6791 //
6792 // EltSize & Mask == NegC & Mask
6793 //
6794 // (because "x & Mask" is a truncation and distributes through subtraction).
6795 //
6796 // We also need to account for a potential truncation of NegOp1 if the amount
6797 // has already been legalized to a shift amount type.
6798 APInt Width;
6799 if ((Pos == NegOp1) ||
6800 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6801 Width = NegC->getAPIntValue();
6802
6803 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6804 // Then the condition we want to prove becomes:
6805 //
6806 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6807 //
6808 // which, again because "x & Mask" is a truncation, becomes:
6809 //
6810 // NegC & Mask == (EltSize - PosC) & Mask
6811 // EltSize & Mask == (NegC + PosC) & Mask
6812 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6813 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6814 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6815 else
6816 return false;
6817 } else
6818 return false;
6819
6820 // Now we just need to check that EltSize & Mask == Width & Mask.
6821 if (MaskLoBits)
6822 // EltSize & Mask is 0 since Mask is EltSize - 1.
6823 return Width.getLoBits(MaskLoBits) == 0;
6824 return Width == EltSize;
6825}
6826
6827// A subroutine of MatchRotate used once we have found an OR of two opposite
6828// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6829// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6830// former being preferred if supported. InnerPos and InnerNeg are Pos and
6831// Neg with outer conversions stripped away.
6832SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6833 SDValue Neg, SDValue InnerPos,
6834 SDValue InnerNeg, unsigned PosOpcode,
6835 unsigned NegOpcode, const SDLoc &DL) {
6836 // fold (or (shl x, (*ext y)),
6837 // (srl x, (*ext (sub 32, y)))) ->
6838 // (rotl x, y) or (rotr x, (sub 32, y))
6839 //
6840 // fold (or (shl x, (*ext (sub 32, y))),
6841 // (srl x, (*ext y))) ->
6842 // (rotr x, y) or (rotl x, (sub 32, y))
6843 EVT VT = Shifted.getValueType();
6844 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6845 /*IsRotate*/ true)) {
6846 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6847 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6848 HasPos ? Pos : Neg);
6849 }
6850
6851 return SDValue();
6852}
6853
6854// A subroutine of MatchRotate used once we have found an OR of two opposite
6855// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6856// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6857// former being preferred if supported. InnerPos and InnerNeg are Pos and
6858// Neg with outer conversions stripped away.
6859// TODO: Merge with MatchRotatePosNeg.
6860SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6861 SDValue Neg, SDValue InnerPos,
6862 SDValue InnerNeg, unsigned PosOpcode,
6863 unsigned NegOpcode, const SDLoc &DL) {
6864 EVT VT = N0.getValueType();
6865 unsigned EltBits = VT.getScalarSizeInBits();
6866
6867 // fold (or (shl x0, (*ext y)),
6868 // (srl x1, (*ext (sub 32, y)))) ->
6869 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6870 //
6871 // fold (or (shl x0, (*ext (sub 32, y))),
6872 // (srl x1, (*ext y))) ->
6873 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6874 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6875 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6876 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6877 HasPos ? Pos : Neg);
6878 }
6879
6880 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6881 // so for now just use the PosOpcode case if its legal.
6882 // TODO: When can we use the NegOpcode case?
6883 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6884 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6885 if (Op.getOpcode() != BinOpc)
6886 return false;
6887 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6888 return Cst && (Cst->getAPIntValue() == Imm);
6889 };
6890
6891 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6892 // -> (fshl x0, x1, y)
6893 if (IsBinOpImm(N1, ISD::SRL, 1) &&
6894 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6895 InnerPos == InnerNeg.getOperand(0) &&
6896 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6897 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6898 }
6899
6900 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6901 // -> (fshr x0, x1, y)
6902 if (IsBinOpImm(N0, ISD::SHL, 1) &&
6903 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6904 InnerNeg == InnerPos.getOperand(0) &&
6905 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6906 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6907 }
6908
6909 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6910 // -> (fshr x0, x1, y)
6911 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6912 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6913 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6914 InnerNeg == InnerPos.getOperand(0) &&
6915 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6916 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6917 }
6918 }
6919
6920 return SDValue();
6921}
6922
6923// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6924// idioms for rotate, and if the target supports rotation instructions, generate
6925// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6926// with different shifted sources.
6927SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6928 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6929 EVT VT = LHS.getValueType();
6930 if (!TLI.isTypeLegal(VT))
6931 return SDValue();
6932
6933 // The target must have at least one rotate/funnel flavor.
6934 bool HasROTL = hasOperation(ISD::ROTL, VT);
6935 bool HasROTR = hasOperation(ISD::ROTR, VT);
6936 bool HasFSHL = hasOperation(ISD::FSHL, VT);
6937 bool HasFSHR = hasOperation(ISD::FSHR, VT);
6938 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6939 return SDValue();
6940
6941 // Check for truncated rotate.
6942 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6943 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6944 assert(LHS.getValueType() == RHS.getValueType())((void)0);
6945 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6946 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6947 }
6948 }
6949
6950 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6951 SDValue LHSShift; // The shift.
6952 SDValue LHSMask; // AND value if any.
6953 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6954
6955 SDValue RHSShift; // The shift.
6956 SDValue RHSMask; // AND value if any.
6957 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6958
6959 // If neither side matched a rotate half, bail
6960 if (!LHSShift && !RHSShift)
6961 return SDValue();
6962
6963 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6964 // side of the rotate, so try to handle that here. In all cases we need to
6965 // pass the matched shift from the opposite side to compute the opcode and
6966 // needed shift amount to extract. We still want to do this if both sides
6967 // matched a rotate half because one half may be a potential overshift that
6968 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6969 // single one).
6970
6971 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6972 if (LHSShift)
6973 if (SDValue NewRHSShift =
6974 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6975 RHSShift = NewRHSShift;
6976 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6977 if (RHSShift)
6978 if (SDValue NewLHSShift =
6979 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6980 LHSShift = NewLHSShift;
6981
6982 // If a side is still missing, nothing else we can do.
6983 if (!RHSShift || !LHSShift)
6984 return SDValue();
6985
6986 // At this point we've matched or extracted a shift op on each side.
6987
6988 if (LHSShift.getOpcode() == RHSShift.getOpcode())
6989 return SDValue(); // Shifts must disagree.
6990
6991 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6992 if (!IsRotate && !(HasFSHL || HasFSHR))
6993 return SDValue(); // Requires funnel shift support.
6994
6995 // Canonicalize shl to left side in a shl/srl pair.
6996 if (RHSShift.getOpcode() == ISD::SHL) {
6997 std::swap(LHS, RHS);
6998 std::swap(LHSShift, RHSShift);
6999 std::swap(LHSMask, RHSMask);
7000 }
7001
7002 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7003 SDValue LHSShiftArg = LHSShift.getOperand(0);
7004 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7005 SDValue RHSShiftArg = RHSShift.getOperand(0);
7006 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7007
7008 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7009 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7010 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7011 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7012 // iff C1+C2 == EltSizeInBits
7013 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7014 ConstantSDNode *RHS) {
7015 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7016 };
7017 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7018 SDValue Res;
7019 if (IsRotate && (HasROTL || HasROTR))
7020 Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7021 HasROTL ? LHSShiftAmt : RHSShiftAmt);
7022 else
7023 Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7024 RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7025
7026 // If there is an AND of either shifted operand, apply it to the result.
7027 if (LHSMask.getNode() || RHSMask.getNode()) {
7028 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7029 SDValue Mask = AllOnes;
7030
7031 if (LHSMask.getNode()) {
7032 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7033 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7034 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7035 }
7036 if (RHSMask.getNode()) {
7037 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7038 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7039 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7040 }
7041
7042 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7043 }
7044
7045 return Res;
7046 }
7047
7048 // If there is a mask here, and we have a variable shift, we can't be sure
7049 // that we're masking out the right stuff.
7050 if (LHSMask.getNode() || RHSMask.getNode())
7051 return SDValue();
7052
7053 // If the shift amount is sign/zext/any-extended just peel it off.
7054 SDValue LExtOp0 = LHSShiftAmt;
7055 SDValue RExtOp0 = RHSShiftAmt;
7056 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7057 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7058 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7059 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7060 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7061 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7062 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7063 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7064 LExtOp0 = LHSShiftAmt.getOperand(0);
7065 RExtOp0 = RHSShiftAmt.getOperand(0);
7066 }
7067
7068 if (IsRotate && (HasROTL || HasROTR)) {
7069 SDValue TryL =
7070 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7071 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7072 if (TryL)
7073 return TryL;
7074
7075 SDValue TryR =
7076 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7077 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7078 if (TryR)
7079 return TryR;
7080 }
7081
7082 SDValue TryL =
7083 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7084 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7085 if (TryL)
7086 return TryL;
7087
7088 SDValue TryR =
7089 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7090 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7091 if (TryR)
7092 return TryR;
7093
7094 return SDValue();
7095}
7096
7097namespace {
7098
7099/// Represents known origin of an individual byte in load combine pattern. The
7100/// value of the byte is either constant zero or comes from memory.
7101struct ByteProvider {
7102 // For constant zero providers Load is set to nullptr. For memory providers
7103 // Load represents the node which loads the byte from memory.
7104 // ByteOffset is the offset of the byte in the value produced by the load.
7105 LoadSDNode *Load = nullptr;
7106 unsigned ByteOffset = 0;
7107
7108 ByteProvider() = default;
7109
7110 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7111 return ByteProvider(Load, ByteOffset);
7112 }
7113
7114 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7115
7116 bool isConstantZero() const { return !Load; }
7117 bool isMemory() const { return Load; }
7118
7119 bool operator==(const ByteProvider &Other) const {
7120 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7121 }
7122
7123private:
7124 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7125 : Load(Load), ByteOffset(ByteOffset) {}
7126};
7127
7128} // end anonymous namespace
7129
7130/// Recursively traverses the expression calculating the origin of the requested
7131/// byte of the given value. Returns None if the provider can't be calculated.
7132///
7133/// For all the values except the root of the expression verifies that the value
7134/// has exactly one use and if it's not true return None. This way if the origin
7135/// of the byte is returned it's guaranteed that the values which contribute to
7136/// the byte are not used outside of this expression.
7137///
7138/// Because the parts of the expression are not allowed to have more than one
7139/// use this function iterates over trees, not DAGs. So it never visits the same
7140/// node more than once.
7141static const Optional<ByteProvider>
7142calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7143 bool Root = false) {
7144 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7145 if (Depth == 10)
7146 return None;
7147
7148 if (!Root && !Op.hasOneUse())
7149 return None;
7150
7151 assert(Op.getValueType().isScalarInteger() && "can't handle other types")((void)0);
7152 unsigned BitWidth = Op.getValueSizeInBits();
7153 if (BitWidth % 8 != 0)
7154 return None;
7155 unsigned ByteWidth = BitWidth / 8;
7156 assert(Index < ByteWidth && "invalid index requested")((void)0);
7157 (void) ByteWidth;
7158
7159 switch (Op.getOpcode()) {
7160 case ISD::OR: {
7161 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7162 if (!LHS)
7163 return None;
7164 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7165 if (!RHS)
7166 return None;
7167
7168 if (LHS->isConstantZero())
7169 return RHS;
7170 if (RHS->isConstantZero())
7171 return LHS;
7172 return None;
7173 }
7174 case ISD::SHL: {
7175 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7176 if (!ShiftOp)
7177 return None;
7178
7179 uint64_t BitShift = ShiftOp->getZExtValue();
7180 if (BitShift % 8 != 0)
7181 return None;
7182 uint64_t ByteShift = BitShift / 8;
7183
7184 return Index < ByteShift
7185 ? ByteProvider::getConstantZero()
7186 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7187 Depth + 1);
7188 }
7189 case ISD::ANY_EXTEND:
7190 case ISD::SIGN_EXTEND:
7191 case ISD::ZERO_EXTEND: {
7192 SDValue NarrowOp = Op->getOperand(0);
7193 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7194 if (NarrowBitWidth % 8 != 0)
7195 return None;
7196 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7197
7198 if (Index >= NarrowByteWidth)
7199 return Op.getOpcode() == ISD::ZERO_EXTEND
7200 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7201 : None;
7202 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7203 }
7204 case ISD::BSWAP:
7205 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7206 Depth + 1);
7207 case ISD::LOAD: {
7208 auto L = cast<LoadSDNode>(Op.getNode());
7209 if (!L->isSimple() || L->isIndexed())
7210 return None;
7211
7212 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7213 if (NarrowBitWidth % 8 != 0)
7214 return None;
7215 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7216
7217 if (Index >= NarrowByteWidth)
7218 return L->getExtensionType() == ISD::ZEXTLOAD
7219 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7220 : None;
7221 return ByteProvider::getMemory(L, Index);
7222 }
7223 }
7224
7225 return None;
7226}
7227
7228static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7229 return i;
7230}
7231
7232static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7233 return BW - i - 1;
7234}
7235
7236// Check if the bytes offsets we are looking at match with either big or
7237// little endian value loaded. Return true for big endian, false for little
7238// endian, and None if match failed.
7239static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7240 int64_t FirstOffset) {
7241 // The endian can be decided only when it is 2 bytes at least.
7242 unsigned Width = ByteOffsets.size();
7243 if (Width < 2)
7244 return None;
7245
7246 bool BigEndian = true, LittleEndian = true;
7247 for (unsigned i = 0; i < Width; i++) {
7248 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7249 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7250 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7251 if (!BigEndian && !LittleEndian)
7252 return None;
7253 }
7254
7255 assert((BigEndian != LittleEndian) && "It should be either big endian or"((void)0)
7256 "little endian")((void)0);
7257 return BigEndian;
7258}
7259
7260static SDValue stripTruncAndExt(SDValue Value) {
7261 switch (Value.getOpcode()) {
7262 case ISD::TRUNCATE:
7263 case ISD::ZERO_EXTEND:
7264 case ISD::SIGN_EXTEND:
7265 case ISD::ANY_EXTEND:
7266 return stripTruncAndExt(Value.getOperand(0));
7267 }
7268 return Value;
7269}
7270
7271/// Match a pattern where a wide type scalar value is stored by several narrow
7272/// stores. Fold it into a single store or a BSWAP and a store if the targets
7273/// supports it.
7274///
7275/// Assuming little endian target:
7276/// i8 *p = ...
7277/// i32 val = ...
7278/// p[0] = (val >> 0) & 0xFF;
7279/// p[1] = (val >> 8) & 0xFF;
7280/// p[2] = (val >> 16) & 0xFF;
7281/// p[3] = (val >> 24) & 0xFF;
7282/// =>
7283/// *((i32)p) = val;
7284///
7285/// i8 *p = ...
7286/// i32 val = ...
7287/// p[0] = (val >> 24) & 0xFF;
7288/// p[1] = (val >> 16) & 0xFF;
7289/// p[2] = (val >> 8) & 0xFF;
7290/// p[3] = (val >> 0) & 0xFF;
7291/// =>
7292/// *((i32)p) = BSWAP(val);
7293SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7294 // The matching looks for "store (trunc x)" patterns that appear early but are
7295 // likely to be replaced by truncating store nodes during combining.
7296 // TODO: If there is evidence that running this later would help, this
7297 // limitation could be removed. Legality checks may need to be added
7298 // for the created store and optional bswap/rotate.
7299 if (LegalOperations)
7300 return SDValue();
7301
7302 // We only handle merging simple stores of 1-4 bytes.
7303 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7304 EVT MemVT = N->getMemoryVT();
7305 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7306 !N->isSimple() || N->isIndexed())
7307 return SDValue();
7308
7309 // Collect all of the stores in the chain.
7310 SDValue Chain = N->getChain();
7311 SmallVector<StoreSDNode *, 8> Stores = {N};
7312 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7313 // All stores must be the same size to ensure that we are writing all of the
7314 // bytes in the wide value.
7315 // TODO: We could allow multiple sizes by tracking each stored byte.
7316 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7317 Store->isIndexed())
7318 return SDValue();
7319 Stores.push_back(Store);
7320 Chain = Store->getChain();
7321 }
7322 // There is no reason to continue if we do not have at least a pair of stores.
7323 if (Stores.size() < 2)
7324 return SDValue();
7325
7326 // Handle simple types only.
7327 LLVMContext &Context = *DAG.getContext();
7328 unsigned NumStores = Stores.size();
7329 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7330 unsigned WideNumBits = NumStores * NarrowNumBits;
7331 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7332 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7333 return SDValue();
7334
7335 // Check if all bytes of the source value that we are looking at are stored
7336 // to the same base address. Collect offsets from Base address into OffsetMap.
7337 SDValue SourceValue;
7338 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX0x7fffffffffffffffLL);
7339 int64_t FirstOffset = INT64_MAX0x7fffffffffffffffLL;
7340 StoreSDNode *FirstStore = nullptr;
7341 Optional<BaseIndexOffset> Base;
7342 for (auto Store : Stores) {
7343 // All the stores store different parts of the CombinedValue. A truncate is
7344 // required to get the partial value.
7345 SDValue Trunc = Store->getValue();
7346 if (Trunc.getOpcode() != ISD::TRUNCATE)
7347 return SDValue();
7348 // Other than the first/last part, a shift operation is required to get the
7349 // offset.
7350 int64_t Offset = 0;
7351 SDValue WideVal = Trunc.getOperand(0);
7352 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7353 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7354 // The shift amount must be a constant multiple of the narrow type.
7355 // It is translated to the offset address in the wide source value "y".
7356 //
7357 // x = srl y, ShiftAmtC
7358 // i8 z = trunc x
7359 // store z, ...
7360 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7361 if (ShiftAmtC % NarrowNumBits != 0)
7362 return SDValue();
7363
7364 Offset = ShiftAmtC / NarrowNumBits;
7365 WideVal = WideVal.getOperand(0);
7366 }
7367
7368 // Stores must share the same source value with different offsets.
7369 // Truncate and extends should be stripped to get the single source value.
7370 if (!SourceValue)
7371 SourceValue = WideVal;
7372 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7373 return SDValue();
7374 else if (SourceValue.getValueType() != WideVT) {
7375 if (WideVal.getValueType() == WideVT ||
7376 WideVal.getScalarValueSizeInBits() >
7377 SourceValue.getScalarValueSizeInBits())
7378 SourceValue = WideVal;
7379 // Give up if the source value type is smaller than the store size.
7380 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7381 return SDValue();
7382 }
7383
7384 // Stores must share the same base address.
7385 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7386 int64_t ByteOffsetFromBase = 0;
7387 if (!Base)
7388 Base = Ptr;
7389 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7390 return SDValue();
7391
7392 // Remember the first store.
7393 if (ByteOffsetFromBase < FirstOffset) {
7394 FirstStore = Store;
7395 FirstOffset = ByteOffsetFromBase;
7396 }
7397 // Map the offset in the store and the offset in the combined value, and
7398 // early return if it has been set before.
7399 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX0x7fffffffffffffffLL)
7400 return SDValue();
7401 OffsetMap[Offset] = ByteOffsetFromBase;
7402 }
7403
7404 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((void)0);
7405 assert(FirstStore && "First store must be set")((void)0);
7406
7407 // Check that a store of the wide type is both allowed and fast on the target
7408 const DataLayout &Layout = DAG.getDataLayout();
7409 bool Fast = false;
7410 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7411 *FirstStore->getMemOperand(), &Fast);
7412 if (!Allowed || !Fast)
7413 return SDValue();
7414
7415 // Check if the pieces of the value are going to the expected places in memory
7416 // to merge the stores.
7417 auto checkOffsets = [&](bool MatchLittleEndian) {
7418 if (MatchLittleEndian) {
7419 for (unsigned i = 0; i != NumStores; ++i)
7420 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7421 return false;
7422 } else { // MatchBigEndian by reversing loop counter.
7423 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7424 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7425 return false;
7426 }
7427 return true;
7428 };
7429
7430 // Check if the offsets line up for the native data layout of this target.
7431 bool NeedBswap = false;
7432 bool NeedRotate = false;
7433 if (!checkOffsets(Layout.isLittleEndian())) {
7434 // Special-case: check if byte offsets line up for the opposite endian.
7435 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7436 NeedBswap = true;
7437 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7438 NeedRotate = true;
7439 else
7440 return SDValue();
7441 }
7442
7443 SDLoc DL(N);
7444 if (WideVT != SourceValue.getValueType()) {
7445 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&((void)0)
7446 "Unexpected store value to merge")((void)0);
7447 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7448 }
7449
7450 // Before legalize we can introduce illegal bswaps/rotates which will be later
7451 // converted to an explicit bswap sequence. This way we end up with a single
7452 // store and byte shuffling instead of several stores and byte shuffling.
7453 if (NeedBswap) {
7454 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7455 } else if (NeedRotate) {
7456 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")((void)0);
7457 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7458 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7459 }
7460
7461 SDValue NewStore =
7462 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7463 FirstStore->getPointerInfo(), FirstStore->getAlign());
7464
7465 // Rely on other DAG combine rules to remove the other individual stores.
7466 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7467 return NewStore;
7468}
7469
7470/// Match a pattern where a wide type scalar value is loaded by several narrow
7471/// loads and combined by shifts and ors. Fold it into a single load or a load
7472/// and a BSWAP if the targets supports it.
7473///
7474/// Assuming little endian target:
7475/// i8 *a = ...
7476/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7477/// =>
7478/// i32 val = *((i32)a)
7479///
7480/// i8 *a = ...
7481/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7482/// =>
7483/// i32 val = BSWAP(*((i32)a))
7484///
7485/// TODO: This rule matches complex patterns with OR node roots and doesn't
7486/// interact well with the worklist mechanism. When a part of the pattern is
7487/// updated (e.g. one of the loads) its direct users are put into the worklist,
7488/// but the root node of the pattern which triggers the load combine is not
7489/// necessarily a direct user of the changed node. For example, once the address
7490/// of t28 load is reassociated load combine won't be triggered:
7491/// t25: i32 = add t4, Constant:i32<2>
7492/// t26: i64 = sign_extend t25
7493/// t27: i64 = add t2, t26
7494/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7495/// t29: i32 = zero_extend t28
7496/// t32: i32 = shl t29, Constant:i8<8>
7497/// t33: i32 = or t23, t32
7498/// As a possible fix visitLoad can check if the load can be a part of a load
7499/// combine pattern and add corresponding OR roots to the worklist.
7500SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7501 assert(N->getOpcode() == ISD::OR &&((void)0)
7502 "Can only match load combining against OR nodes")((void)0);
7503
7504 // Handles simple types only
7505 EVT VT = N->getValueType(0);
7506 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7507 return SDValue();
7508 unsigned ByteWidth = VT.getSizeInBits() / 8;
7509
7510 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7511 auto MemoryByteOffset = [&] (ByteProvider P) {
7512 assert(P.isMemory() && "Must be a memory byte provider")((void)0);
7513 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7514 assert(LoadBitWidth % 8 == 0 &&((void)0)
7515 "can only analyze providers for individual bytes not bit")((void)0);
7516 unsigned LoadByteWidth = LoadBitWidth / 8;
7517 return IsBigEndianTarget
7518 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7519 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7520 };
7521
7522 Optional<BaseIndexOffset> Base;
7523 SDValue Chain;
7524
7525 SmallPtrSet<LoadSDNode *, 8> Loads;
7526 Optional<ByteProvider> FirstByteProvider;
7527 int64_t FirstOffset = INT64_MAX0x7fffffffffffffffLL;
7528
7529 // Check if all the bytes of the OR we are looking at are loaded from the same
7530 // base address. Collect bytes offsets from Base address in ByteOffsets.
7531 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7532 unsigned ZeroExtendedBytes = 0;
7533 for (int i = ByteWidth - 1; i >= 0; --i) {
7534 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7535 if (!P)
7536 return SDValue();
7537
7538 if (P->isConstantZero()) {
7539 // It's OK for the N most significant bytes to be 0, we can just
7540 // zero-extend the load.
7541 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7542 return SDValue();
7543 continue;
7544 }
7545 assert(P->isMemory() && "provenance should either be memory or zero")((void)0);
7546
7547 LoadSDNode *L = P->Load;
7548 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&((void)0)
7549 !L->isIndexed() &&((void)0)
7550 "Must be enforced by calculateByteProvider")((void)0);
7551 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")((void)0);
7552
7553 // All loads must share the same chain
7554 SDValue LChain = L->getChain();
7555 if (!Chain)
7556 Chain = LChain;
7557 else if (Chain != LChain)
7558 return SDValue();
7559
7560 // Loads must share the same base address
7561 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7562 int64_t ByteOffsetFromBase = 0;
7563 if (!Base)
7564 Base = Ptr;
7565 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7566 return SDValue();
7567
7568 // Calculate the offset of the current byte from the base address
7569 ByteOffsetFromBase += MemoryByteOffset(*P);
7570 ByteOffsets[i] = ByteOffsetFromBase;
7571
7572 // Remember the first byte load
7573 if (ByteOffsetFromBase < FirstOffset) {
7574 FirstByteProvider = P;
7575 FirstOffset = ByteOffsetFromBase;
7576 }
7577
7578 Loads.insert(L);
7579 }
7580 assert(!Loads.empty() && "All the bytes of the value must be loaded from "((void)0)
7581 "memory, so there must be at least one load which produces the value")((void)0);
7582 assert(Base && "Base address of the accessed memory location must be set")((void)0);
7583 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((void)0);
7584
7585 bool NeedsZext = ZeroExtendedBytes > 0;
7586
7587 EVT MemVT =
7588 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7589
7590 if (!MemVT.isSimple())
7591 return SDValue();
7592
7593 // Before legalize we can introduce too wide illegal loads which will be later
7594 // split into legal sized loads. This enables us to combine i64 load by i8
7595 // patterns to a couple of i32 loads on 32 bit targets.
7596 if (LegalOperations &&
7597 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7598 MemVT))
7599 return SDValue();
7600
7601 // Check if the bytes of the OR we are looking at match with either big or
7602 // little endian value load
7603 Optional<bool> IsBigEndian = isBigEndian(
7604 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7605 if (!IsBigEndian.hasValue())
7606 return SDValue();
7607
7608 assert(FirstByteProvider && "must be set")((void)0);
7609
7610 // Ensure that the first byte is loaded from zero offset of the first load.
7611 // So the combined value can be loaded from the first load address.
7612 if (MemoryByteOffset(*FirstByteProvider) != 0)
7613 return SDValue();
7614 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7615
7616 // The node we are looking at matches with the pattern, check if we can
7617 // replace it with a single (possibly zero-extended) load and bswap + shift if
7618 // needed.
7619
7620 // If the load needs byte swap check if the target supports it
7621 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7622
7623 // Before legalize we can introduce illegal bswaps which will be later
7624 // converted to an explicit bswap sequence. This way we end up with a single
7625 // load and byte shuffling instead of several loads and byte shuffling.
7626 // We do not introduce illegal bswaps when zero-extending as this tends to
7627 // introduce too many arithmetic instructions.
7628 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7629 !TLI.isOperationLegal(ISD::BSWAP, VT))
7630 return SDValue();
7631
7632 // If we need to bswap and zero extend, we have to insert a shift. Check that
7633 // it is legal.
7634 if (NeedsBswap && NeedsZext && LegalOperations &&
7635 !TLI.isOperationLegal(ISD::SHL, VT))
7636 return SDValue();
7637
7638 // Check that a load of the wide type is both allowed and fast on the target
7639 bool Fast = false;
7640 bool Allowed =
7641 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7642 *FirstLoad->getMemOperand(), &Fast);
7643 if (!Allowed || !Fast)
7644 return SDValue();
7645
7646 SDValue NewLoad =
7647 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7648 Chain, FirstLoad->getBasePtr(),
7649 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7650
7651 // Transfer chain users from old loads to the new load.
7652 for (LoadSDNode *L : Loads)
7653 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7654
7655 if (!NeedsBswap)
7656 return NewLoad;
7657
7658 SDValue ShiftedLoad =
7659 NeedsZext
7660 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7661 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7662 SDLoc(N), LegalOperations))
7663 : NewLoad;
7664 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7665}
7666
7667// If the target has andn, bsl, or a similar bit-select instruction,
7668// we want to unfold masked merge, with canonical pattern of:
7669// | A | |B|
7670// ((x ^ y) & m) ^ y
7671// | D |
7672// Into:
7673// (x & m) | (y & ~m)
7674// If y is a constant, and the 'andn' does not work with immediates,
7675// we unfold into a different pattern:
7676// ~(~x & m) & (m | y)
7677// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7678// the very least that breaks andnpd / andnps patterns, and because those
7679// patterns are simplified in IR and shouldn't be created in the DAG
7680SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7681 assert(N->getOpcode() == ISD::XOR)((void)0);
7682
7683 // Don't touch 'not' (i.e. where y = -1).
7684 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7685 return SDValue();
7686
7687 EVT VT = N->getValueType(0);
7688
7689 // There are 3 commutable operators in the pattern,
7690 // so we have to deal with 8 possible variants of the basic pattern.
7691 SDValue X, Y, M;
7692 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7693 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7694 return false;
7695 SDValue Xor = And.getOperand(XorIdx);
7696 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7697 return false;
7698 SDValue Xor0 = Xor.getOperand(0);
7699 SDValue Xor1 = Xor.getOperand(1);
7700 // Don't touch 'not' (i.e. where y = -1).
7701 if (isAllOnesOrAllOnesSplat(Xor1))
7702 return false;
7703 if (Other == Xor0)
7704 std::swap(Xor0, Xor1);
7705 if (Other != Xor1)
7706 return false;
7707 X = Xor0;
7708 Y = Xor1;
7709 M = And.getOperand(XorIdx ? 0 : 1);
7710 return true;
7711 };
7712
7713 SDValue N0 = N->getOperand(0);
7714 SDValue N1 = N->getOperand(1);
7715 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7716 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7717 return SDValue();
7718
7719 // Don't do anything if the mask is constant. This should not be reachable.
7720 // InstCombine should have already unfolded this pattern, and DAGCombiner
7721 // probably shouldn't produce it, too.
7722 if (isa<ConstantSDNode>(M.getNode()))
7723 return SDValue();
7724
7725 // We can transform if the target has AndNot
7726 if (!TLI.hasAndNot(M))
7727 return SDValue();
7728
7729 SDLoc DL(N);
7730
7731 // If Y is a constant, check that 'andn' works with immediates.
7732 if (!TLI.hasAndNot(Y)) {
7733 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")((void)0);
7734 // If not, we need to do a bit more work to make sure andn is still used.
7735 SDValue NotX = DAG.getNOT(DL, X, VT);
7736 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7737 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7738 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7739 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7740 }
7741
7742 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7743 SDValue NotM = DAG.getNOT(DL, M, VT);
7744 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7745
7746 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7747}
7748
7749SDValue DAGCombiner::visitXOR(SDNode *N) {
7750 SDValue N0 = N->getOperand(0);
7751 SDValue N1 = N->getOperand(1);
7752 EVT VT = N0.getValueType();
7753
7754 // fold vector ops
7755 if (VT.isVector()) {
7756 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7757 return FoldedVOp;
7758
7759 // fold (xor x, 0) -> x, vector edition
7760 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7761 return N1;
7762 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7763 return N0;
7764 }
7765
7766 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7767 SDLoc DL(N);
7768 if (N0.isUndef() && N1.isUndef())
7769 return DAG.getConstant(0, DL, VT);
7770
7771 // fold (xor x, undef) -> undef
7772 if (N0.isUndef())
7773 return N0;
7774 if (N1.isUndef())
7775 return N1;
7776
7777 // fold (xor c1, c2) -> c1^c2
7778 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7779 return C;
7780
7781 // canonicalize constant to RHS
7782 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7783 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7784 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7785
7786 // fold (xor x, 0) -> x
7787 if (isNullConstant(N1))
7788 return N0;
7789
7790 if (SDValue NewSel = foldBinOpIntoSelect(N))
7791 return NewSel;
7792
7793 // reassociate xor
7794 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7795 return RXOR;
7796
7797 // fold !(x cc y) -> (x !cc y)
7798 unsigned N0Opcode = N0.getOpcode();
7799 SDValue LHS, RHS, CC;
7800 if (TLI.isConstTrueVal(N1.getNode()) &&
7801 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7802 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7803 LHS.getValueType());
7804 if (!LegalOperations ||
7805 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7806 switch (N0Opcode) {
7807 default:
7808 llvm_unreachable("Unhandled SetCC Equivalent!")__builtin_unreachable();
7809 case ISD::SETCC:
7810 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7811 case ISD::SELECT_CC:
7812 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7813 N0.getOperand(3), NotCC);
7814 case ISD::STRICT_FSETCC:
7815 case ISD::STRICT_FSETCCS: {
7816 if (N0.hasOneUse()) {
7817 // FIXME Can we handle multiple uses? Could we token factor the chain
7818 // results from the new/old setcc?
7819 SDValue SetCC =
7820 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7821 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7822 CombineTo(N, SetCC);
7823 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7824 recursivelyDeleteUnusedNodes(N0.getNode());
7825 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7826 }
7827 break;
7828 }
7829 }
7830 }
7831 }
7832
7833 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7834 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7835 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7836 SDValue V = N0.getOperand(0);
7837 SDLoc DL0(N0);
7838 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7839 DAG.getConstant(1, DL0, V.getValueType()));
7840 AddToWorklist(V.getNode());
7841 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7842 }
7843
7844 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7845 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7846 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7847 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7848 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7849 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7850 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7851 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7852 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7853 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7854 }
7855 }
7856 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7857 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7858 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7859 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7860 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7861 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7862 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7863 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7864 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7865 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7866 }
7867 }
7868
7869 // fold (not (neg x)) -> (add X, -1)
7870 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7871 // Y is a constant or the subtract has a single use.
7872 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7873 isNullConstant(N0.getOperand(0))) {
7874 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7875 DAG.getAllOnesConstant(DL, VT));
7876 }
7877
7878 // fold (not (add X, -1)) -> (neg X)
7879 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7880 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7881 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7882 N0.getOperand(0));
7883 }
7884
7885 // fold (xor (and x, y), y) -> (and (not x), y)
7886 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7887 SDValue X = N0.getOperand(0);
7888 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7889 AddToWorklist(NotX.getNode());
7890 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7891 }
7892
7893 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7894 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7895 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7896 unsigned BitWidth = VT.getScalarSizeInBits();
7897 if (XorC && ShiftC) {
7898 // Don't crash on an oversized shift. We can not guarantee that a bogus
7899 // shift has been simplified to undef.
7900 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7901 if (ShiftAmt < BitWidth) {
7902 APInt Ones = APInt::getAllOnesValue(BitWidth);
7903 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7904 if (XorC->getAPIntValue() == Ones) {
7905 // If the xor constant is a shifted -1, do a 'not' before the shift:
7906 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7907 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7908 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7909 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7910 }
7911 }
7912 }
7913 }
7914
7915 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7916 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7917 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7918 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7919 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7920 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7921 SDValue S0 = S.getOperand(0);
7922 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7923 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7924 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7925 return DAG.getNode(ISD::ABS, DL, VT, S0);
7926 }
7927 }
7928
7929 // fold (xor x, x) -> 0
7930 if (N0 == N1)
7931 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7932
7933 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7934 // Here is a concrete example of this equivalence:
7935 // i16 x == 14
7936 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7937 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7938 //
7939 // =>
7940 //
7941 // i16 ~1 == 0b1111111111111110
7942 // i16 rol(~1, 14) == 0b1011111111111111
7943 //
7944 // Some additional tips to help conceptualize this transform:
7945 // - Try to see the operation as placing a single zero in a value of all ones.
7946 // - There exists no value for x which would allow the result to contain zero.
7947 // - Values of x larger than the bitwidth are undefined and do not require a
7948 // consistent result.
7949 // - Pushing the zero left requires shifting one bits in from the right.
7950 // A rotate left of ~1 is a nice way of achieving the desired result.
7951 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7952 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7953 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7954 N0.getOperand(1));
7955 }
7956
7957 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7958 if (N0Opcode == N1.getOpcode())
7959 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7960 return V;
7961
7962 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7963 if (SDValue MM = unfoldMaskedMerge(N))
7964 return MM;
7965
7966 // Simplify the expression using non-local knowledge.
7967 if (SimplifyDemandedBits(SDValue(N, 0)))
7968 return SDValue(N, 0);
7969
7970 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7971 return Combined;
7972
7973 return SDValue();
7974}
7975
7976/// If we have a shift-by-constant of a bitwise logic op that itself has a
7977/// shift-by-constant operand with identical opcode, we may be able to convert
7978/// that into 2 independent shifts followed by the logic op. This is a
7979/// throughput improvement.
7980static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7981 // Match a one-use bitwise logic op.
7982 SDValue LogicOp = Shift->getOperand(0);
7983 if (!LogicOp.hasOneUse())
7984 return SDValue();
7985
7986 unsigned LogicOpcode = LogicOp.getOpcode();
7987 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7988 LogicOpcode != ISD::XOR)
7989 return SDValue();
7990
7991 // Find a matching one-use shift by constant.
7992 unsigned ShiftOpcode = Shift->getOpcode();
7993 SDValue C1 = Shift->getOperand(1);
7994 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7995 assert(C1Node && "Expected a shift with constant operand")((void)0);
7996 const APInt &C1Val = C1Node->getAPIntValue();
7997 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7998 const APInt *&ShiftAmtVal) {
7999 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8000 return false;
8001
8002 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8003 if (!ShiftCNode)
8004 return false;
8005
8006 // Capture the shifted operand and shift amount value.
8007 ShiftOp = V.getOperand(0);
8008 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8009
8010 // Shift amount types do not have to match their operand type, so check that
8011 // the constants are the same width.
8012 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8013 return false;
8014
8015 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8016 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8017 return false;
8018
8019 return true;
8020 };
8021
8022 // Logic ops are commutative, so check each operand for a match.
8023 SDValue X, Y;
8024 const APInt *C0Val;
8025 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8026 Y = LogicOp.getOperand(1);
8027 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8028 Y = LogicOp.getOperand(0);
8029 else
8030 return SDValue();
8031
8032 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8033 SDLoc DL(Shift);
8034 EVT VT = Shift->getValueType(0);
8035 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8036 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8037 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8038 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8039 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8040}
8041
8042/// Handle transforms common to the three shifts, when the shift amount is a
8043/// constant.
8044/// We are looking for: (shift being one of shl/sra/srl)
8045/// shift (binop X, C0), C1
8046/// And want to transform into:
8047/// binop (shift X, C1), (shift C0, C1)
8048SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8049 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")((void)0);
8050
8051 // Do not turn a 'not' into a regular xor.
8052 if (isBitwiseNot(N->getOperand(0)))
8053 return SDValue();
8054
8055 // The inner binop must be one-use, since we want to replace it.
8056 SDValue LHS = N->getOperand(0);
8057 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8058 return SDValue();
8059
8060 // TODO: This is limited to early combining because it may reveal regressions
8061 // otherwise. But since we just checked a target hook to see if this is
8062 // desirable, that should have filtered out cases where this interferes
8063 // with some other pattern matching.
8064 if (!LegalTypes)
8065 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8066 return R;
8067
8068 // We want to pull some binops through shifts, so that we have (and (shift))
8069 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8070 // thing happens with address calculations, so it's important to canonicalize
8071 // it.
8072 switch (LHS.getOpcode()) {
8073 default:
8074 return SDValue();
8075 case ISD::OR:
8076 case ISD::XOR:
8077 case ISD::AND:
8078 break;
8079 case ISD::ADD:
8080 if (N->getOpcode() != ISD::SHL)
8081 return SDValue(); // only shl(add) not sr[al](add).
8082 break;
8083 }
8084
8085 // We require the RHS of the binop to be a constant and not opaque as well.
8086 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8087 if (!BinOpCst)
8088 return SDValue();
8089
8090 // FIXME: disable this unless the input to the binop is a shift by a constant
8091 // or is copy/select. Enable this in other cases when figure out it's exactly
8092 // profitable.
8093 SDValue BinOpLHSVal = LHS.getOperand(0);
8094 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8095 BinOpLHSVal.getOpcode() == ISD::SRA ||
8096 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8097 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8098 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8099 BinOpLHSVal.getOpcode() == ISD::SELECT;
8100
8101 if (!IsShiftByConstant && !IsCopyOrSelect)
8102 return SDValue();
8103
8104 if (IsCopyOrSelect && N->hasOneUse())
8105 return SDValue();
8106
8107 // Fold the constants, shifting the binop RHS by the shift amount.
8108 SDLoc DL(N);
8109 EVT VT = N->getValueType(0);
8110 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8111 N->getOperand(1));
8112 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")((void)0);
8113
8114 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8115 N->getOperand(1));
8116 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8117}
8118
8119SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8120 assert(N->getOpcode() == ISD::TRUNCATE)((void)0);
8121 assert(N->getOperand(0).getOpcode() == ISD::AND)((void)0);
8122
8123 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8124 EVT TruncVT = N->getValueType(0);
8125 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8126 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8127 SDValue N01 = N->getOperand(0).getOperand(1);
8128 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8129 SDLoc DL(N);
8130 SDValue N00 = N->getOperand(0).getOperand(0);
8131 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8132 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8133 AddToWorklist(Trunc00.getNode());
8134 AddToWorklist(Trunc01.getNode());
8135 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8136 }
8137 }
8138
8139 return SDValue();
8140}
8141
8142SDValue DAGCombiner::visitRotate(SDNode *N) {
8143 SDLoc dl(N);
8144 SDValue N0 = N->getOperand(0);
8145 SDValue N1 = N->getOperand(1);
8146 EVT VT = N->getValueType(0);
8147 unsigned Bitsize = VT.getScalarSizeInBits();
8148
8149 // fold (rot x, 0) -> x
8150 if (isNullOrNullSplat(N1))
8151 return N0;
8152
8153 // fold (rot x, c) -> x iff (c % BitSize) == 0
8154 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8155 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8156 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8157 return N0;
8158 }
8159
8160 // fold (rot x, c) -> (rot x, c % BitSize)
8161 bool OutOfRange = false;
8162 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8163 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8164 return true;
8165 };
8166 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8167 EVT AmtVT = N1.getValueType();
8168 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8169 if (SDValue Amt =
8170 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8171 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8172 }
8173
8174 // rot i16 X, 8 --> bswap X
8175 auto *RotAmtC = isConstOrConstSplat(N1);
8176 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8177 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8178 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8179
8180 // Simplify the operands using demanded-bits information.
8181 if (SimplifyDemandedBits(SDValue(N, 0)))
8182 return SDValue(N, 0);
8183
8184 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8185 if (N1.getOpcode() == ISD::TRUNCATE &&
8186 N1.getOperand(0).getOpcode() == ISD::AND) {
8187 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8188 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8189 }
8190
8191 unsigned NextOp = N0.getOpcode();
8192 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8193 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8194 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8195 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8196 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8197 EVT ShiftVT = C1->getValueType(0);
8198 bool SameSide = (N->getOpcode() == NextOp);
8199 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8200 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8201 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8202 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8203 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8204 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8205 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8206 CombinedShiftNorm);
8207 }
8208 }
8209 }
8210 return SDValue();
8211}
8212
8213SDValue DAGCombiner::visitSHL(SDNode *N) {
8214 SDValue N0 = N->getOperand(0);
8215 SDValue N1 = N->getOperand(1);
8216 if (SDValue V = DAG.simplifyShift(N0, N1))
8217 return V;
8218
8219 EVT VT = N0.getValueType();
8220 EVT ShiftVT = N1.getValueType();
8221 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8222
8223 // fold vector ops
8224 if (VT.isVector()) {
8225 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8226 return FoldedVOp;
8227
8228 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8229 // If setcc produces all-one true value then:
8230 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8231 if (N1CV && N1CV->isConstant()) {
8232 if (N0.getOpcode() == ISD::AND) {
8233 SDValue N00 = N0->getOperand(0);
8234 SDValue N01 = N0->getOperand(1);
8235 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8236
8237 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8238 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8239 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8240 if (SDValue C =
8241 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8242 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8243 }
8244 }
8245 }
8246 }
8247
8248 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8249
8250 // fold (shl c1, c2) -> c1<<c2
8251 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8252 return C;
8253
8254 if (SDValue NewSel = foldBinOpIntoSelect(N))
8255 return NewSel;
8256
8257 // if (shl x, c) is known to be zero, return 0
8258 if (DAG.MaskedValueIsZero(SDValue(N, 0),
8259 APInt::getAllOnesValue(OpSizeInBits)))
8260 return DAG.getConstant(0, SDLoc(N), VT);
8261
8262 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8263 if (N1.getOpcode() == ISD::TRUNCATE &&
8264 N1.getOperand(0).getOpcode() == ISD::AND) {
8265 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8266 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8267 }
8268
8269 if (SimplifyDemandedBits(SDValue(N, 0)))
8270 return SDValue(N, 0);
8271
8272 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8273 if (N0.getOpcode() == ISD::SHL) {
8274 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8275 ConstantSDNode *RHS) {
8276 APInt c1 = LHS->getAPIntValue();
8277 APInt c2 = RHS->getAPIntValue();
8278 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8279 return (c1 + c2).uge(OpSizeInBits);
8280 };
8281 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8282 return DAG.getConstant(0, SDLoc(N), VT);
8283
8284 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8285 ConstantSDNode *RHS) {
8286 APInt c1 = LHS->getAPIntValue();
8287 APInt c2 = RHS->getAPIntValue();
8288 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8289 return (c1 + c2).ult(OpSizeInBits);
8290 };
8291 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8292 SDLoc DL(N);
8293 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8294 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8295 }
8296 }
8297
8298 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8299 // For this to be valid, the second form must not preserve any of the bits
8300 // that are shifted out by the inner shift in the first form. This means
8301 // the outer shift size must be >= the number of bits added by the ext.
8302 // As a corollary, we don't care what kind of ext it is.
8303 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8304 N0.getOpcode() == ISD::ANY_EXTEND ||
8305 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8306 N0.getOperand(0).getOpcode() == ISD::SHL) {
8307 SDValue N0Op0 = N0.getOperand(0);
8308 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8309 EVT InnerVT = N0Op0.getValueType();
8310 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8311
8312 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8313 ConstantSDNode *RHS) {
8314 APInt c1 = LHS->getAPIntValue();
8315 APInt c2 = RHS->getAPIntValue();
8316 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8317 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8318 (c1 + c2).uge(OpSizeInBits);
8319 };
8320 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8321 /*AllowUndefs*/ false,
8322 /*AllowTypeMismatch*/ true))
8323 return DAG.getConstant(0, SDLoc(N), VT);
8324
8325 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8326 ConstantSDNode *RHS) {
8327 APInt c1 = LHS->getAPIntValue();
8328 APInt c2 = RHS->getAPIntValue();
8329 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8330 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8331 (c1 + c2).ult(OpSizeInBits);
8332 };
8333 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8334 /*AllowUndefs*/ false,
8335 /*AllowTypeMismatch*/ true)) {
8336 SDLoc DL(N);
8337 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8338 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8339 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8340 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8341 }
8342 }
8343
8344 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8345 // Only fold this if the inner zext has no other uses to avoid increasing
8346 // the total number of instructions.
8347 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8348 N0.getOperand(0).getOpcode() == ISD::SRL) {
8349 SDValue N0Op0 = N0.getOperand(0);
8350 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8351
8352 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8353 APInt c1 = LHS->getAPIntValue();
8354 APInt c2 = RHS->getAPIntValue();
8355 zeroExtendToMatch(c1, c2);
8356 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8357 };
8358 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8359 /*AllowUndefs*/ false,
8360 /*AllowTypeMismatch*/ true)) {
8361 SDLoc DL(N);
8362 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8363 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8364 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8365 AddToWorklist(NewSHL.getNode());
8366 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8367 }
8368 }
8369
8370 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8371 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8372 // TODO - support non-uniform vector shift amounts.
8373 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8374 N0->getFlags().hasExact()) {
8375 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8376 uint64_t C1 = N0C1->getZExtValue();
8377 uint64_t C2 = N1C->getZExtValue();
8378 SDLoc DL(N);
8379 if (C1 <= C2)
8380 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8381 DAG.getConstant(C2 - C1, DL, ShiftVT));
8382 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8383 DAG.getConstant(C1 - C2, DL, ShiftVT));
8384 }
8385 }
8386
8387 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8388 // (and (srl x, (sub c1, c2), MASK)
8389 // Only fold this if the inner shift has no other uses -- if it does, folding
8390 // this will increase the total number of instructions.
8391 // TODO - drop hasOneUse requirement if c1 == c2?
8392 // TODO - support non-uniform vector shift amounts.
8393 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8394 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8395 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8396 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8397 uint64_t c1 = N0C1->getZExtValue();
8398 uint64_t c2 = N1C->getZExtValue();
8399 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8400 SDValue Shift;
8401 if (c2 > c1) {
8402 Mask <<= c2 - c1;
8403 SDLoc DL(N);
8404 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8405 DAG.getConstant(c2 - c1, DL, ShiftVT));
8406 } else {
8407 Mask.lshrInPlace(c1 - c2);
8408 SDLoc DL(N);
8409 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8410 DAG.getConstant(c1 - c2, DL, ShiftVT));
8411 }
8412 SDLoc DL(N0);
8413 return DAG.getNode(ISD::AND, DL, VT, Shift,
8414 DAG.getConstant(Mask, DL, VT));
8415 }
8416 }
8417 }
8418
8419 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8420 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8421 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8422 SDLoc DL(N);
8423 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8424 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8425 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8426 }
8427
8428 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8429 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8430 // Variant of version done on multiply, except mul by a power of 2 is turned
8431 // into a shift.
8432 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8433 N0.getNode()->hasOneUse() &&
8434 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8435 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8436 TLI.isDesirableToCommuteWithShift(N, Level)) {
8437 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8438 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8439 AddToWorklist(Shl0.getNode());
8440 AddToWorklist(Shl1.getNode());
8441 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8442 }
8443
8444 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8445 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8446 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8447 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8448 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8449 if (isConstantOrConstantVector(Shl))
8450 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8451 }
8452
8453 if (N1C && !N1C->isOpaque())
8454 if (SDValue NewSHL = visitShiftByConstant(N))
8455 return NewSHL;
8456
8457 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8458 if (N0.getOpcode() == ISD::VSCALE)
8459 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8460 const APInt &C0 = N0.getConstantOperandAPInt(0);
8461 const APInt &C1 = NC1->getAPIntValue();
8462 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8463 }
8464
8465 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8466 APInt ShlVal;
8467 if (N0.getOpcode() == ISD::STEP_VECTOR)
8468 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8469 const APInt &C0 = N0.getConstantOperandAPInt(0);
8470 if (ShlVal.ult(C0.getBitWidth())) {
8471 APInt NewStep = C0 << ShlVal;
8472 return DAG.getStepVector(SDLoc(N), VT, NewStep);
8473 }
8474 }
8475
8476 return SDValue();
8477}
8478
8479// Transform a right shift of a multiply into a multiply-high.
8480// Examples:
8481// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8482// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8483static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8484 const TargetLowering &TLI) {
8485 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&((void)0)
8486 "SRL or SRA node is required here!")((void)0);
8487
8488 // Check the shift amount. Proceed with the transformation if the shift
8489 // amount is constant.
8490 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8491 if (!ShiftAmtSrc)
8492 return SDValue();
8493
8494 SDLoc DL(N);
8495
8496 // The operation feeding into the shift must be a multiply.
8497 SDValue ShiftOperand = N->getOperand(0);
8498 if (ShiftOperand.getOpcode() != ISD::MUL)
8499 return SDValue();
8500
8501 // Both operands must be equivalent extend nodes.
8502 SDValue LeftOp = ShiftOperand.getOperand(0);
8503 SDValue RightOp = ShiftOperand.getOperand(1);
8504 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8505 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8506
8507 if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8508 return SDValue();
8509
8510 EVT WideVT1 = LeftOp.getValueType();
8511 EVT WideVT2 = RightOp.getValueType();
8512 (void)WideVT2;
8513 // Proceed with the transformation if the wide types match.
8514 assert((WideVT1 == WideVT2) &&((void)0)
8515 "Cannot have a multiply node with two different operand types.")((void)0);
8516
8517 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8518 // Check that the two extend nodes are the same type.
8519 if (NarrowVT != RightOp.getOperand(0).getValueType())
8520 return SDValue();
8521
8522 // Proceed with the transformation if the wide type is twice as large
8523 // as the narrow type.
8524 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8525 if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8526 return SDValue();
8527
8528 // Check the shift amount with the narrow type size.
8529 // Proceed with the transformation if the shift amount is the width
8530 // of the narrow type.
8531 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8532 if (ShiftAmt != NarrowVTSize)
8533 return SDValue();
8534
8535 // If the operation feeding into the MUL is a sign extend (sext),
8536 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8537 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8538
8539 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8540 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8541 return SDValue();
8542
8543 SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8544 RightOp.getOperand(0));
8545 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8546 : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8547}
8548
8549SDValue DAGCombiner::visitSRA(SDNode *N) {
8550 SDValue N0 = N->getOperand(0);
8551 SDValue N1 = N->getOperand(1);
8552 if (SDValue V = DAG.simplifyShift(N0, N1))
8553 return V;
8554
8555 EVT VT = N0.getValueType();
8556 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8557
8558 // Arithmetic shifting an all-sign-bit value is a no-op.
8559 // fold (sra 0, x) -> 0
8560 // fold (sra -1, x) -> -1
8561 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8562 return N0;
8563
8564 // fold vector ops
8565 if (VT.isVector())
8566 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8567 return FoldedVOp;
8568
8569 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8570
8571 // fold (sra c1, c2) -> (sra c1, c2)
8572 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8573 return C;
8574
8575 if (SDValue NewSel = foldBinOpIntoSelect(N))
8576 return NewSel;
8577
8578 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8579 // sext_inreg.
8580 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8581 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8582 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8583 if (VT.isVector())
8584 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8585 VT.getVectorElementCount());
8586 if (!LegalOperations ||
8587 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8588 TargetLowering::Legal)
8589 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8590 N0.getOperand(0), DAG.getValueType(ExtVT));
8591 // Even if we can't convert to sext_inreg, we might be able to remove
8592 // this shift pair if the input is already sign extended.
8593 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8594 return N0.getOperand(0);
8595 }
8596
8597 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8598 // clamp (add c1, c2) to max shift.
8599 if (N0.getOpcode() == ISD::SRA) {
8600 SDLoc DL(N);
8601 EVT ShiftVT = N1.getValueType();
8602 EVT ShiftSVT = ShiftVT.getScalarType();
8603 SmallVector<SDValue, 16> ShiftValues;
8604
8605 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8606 APInt c1 = LHS->getAPIntValue();
8607 APInt c2 = RHS->getAPIntValue();
8608 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8609 APInt Sum = c1 + c2;
8610 unsigned ShiftSum =
8611 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8612 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8613 return true;
8614 };
8615 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8616 SDValue ShiftValue;
8617 if (N1.getOpcode() == ISD::BUILD_VECTOR)
8618 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8619 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8620 assert(ShiftValues.size() == 1 &&((void)0)
8621 "Expected matchBinaryPredicate to return one element for "((void)0)
8622 "SPLAT_VECTORs")((void)0);
8623 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8624 } else
8625 ShiftValue = ShiftValues[0];
8626 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8627 }
8628 }
8629
8630 // fold (sra (shl X, m), (sub result_size, n))
8631 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8632 // result_size - n != m.
8633 // If truncate is free for the target sext(shl) is likely to result in better
8634 // code.
8635 if (N0.getOpcode() == ISD::SHL && N1C) {
8636 // Get the two constanst of the shifts, CN0 = m, CN = n.
8637 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8638 if (N01C) {
8639 LLVMContext &Ctx = *DAG.getContext();
8640 // Determine what the truncate's result bitsize and type would be.
8641 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8642
8643 if (VT.isVector())
8644 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8645
8646 // Determine the residual right-shift amount.
8647 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8648
8649 // If the shift is not a no-op (in which case this should be just a sign
8650 // extend already), the truncated to type is legal, sign_extend is legal
8651 // on that type, and the truncate to that type is both legal and free,
8652 // perform the transform.
8653 if ((ShiftAmt > 0) &&
8654 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8655 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8656 TLI.isTruncateFree(VT, TruncVT)) {
8657 SDLoc DL(N);
8658 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8659 getShiftAmountTy(N0.getOperand(0).getValueType()));
8660 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8661 N0.getOperand(0), Amt);
8662 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8663 Shift);
8664 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8665 N->getValueType(0), Trunc);
8666 }
8667 }
8668 }
8669
8670 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8671 // sra (add (shl X, N1C), AddC), N1C -->
8672 // sext (add (trunc X to (width - N1C)), AddC')
8673 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8674 N0.getOperand(0).getOpcode() == ISD::SHL &&
8675 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8676 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8677 SDValue Shl = N0.getOperand(0);
8678 // Determine what the truncate's type would be and ask the target if that
8679 // is a free operation.
8680 LLVMContext &Ctx = *DAG.getContext();
8681 unsigned ShiftAmt = N1C->getZExtValue();
8682 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8683 if (VT.isVector())
8684 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8685
8686 // TODO: The simple type check probably belongs in the default hook
8687 // implementation and/or target-specific overrides (because
8688 // non-simple types likely require masking when legalized), but that
8689 // restriction may conflict with other transforms.
8690 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8691 TLI.isTruncateFree(VT, TruncVT)) {
8692 SDLoc DL(N);
8693 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8694 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8695 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8696 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8697 return DAG.getSExtOrTrunc(Add, DL, VT);
8698 }
8699 }
8700 }
8701
8702 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8703 if (N1.getOpcode() == ISD::TRUNCATE &&
8704 N1.getOperand(0).getOpcode() == ISD::AND) {
8705 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8706 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8707 }
8708
8709 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8710 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8711 // if c1 is equal to the number of bits the trunc removes
8712 // TODO - support non-uniform vector shift amounts.
8713 if (N0.getOpcode() == ISD::TRUNCATE &&
8714 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8715 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8716 N0.getOperand(0).hasOneUse() &&
8717 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8718 SDValue N0Op0 = N0.getOperand(0);
8719 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8720 EVT LargeVT = N0Op0.getValueType();
8721 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8722 if (LargeShift->getAPIntValue() == TruncBits) {
8723 SDLoc DL(N);
8724 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8725 getShiftAmountTy(LargeVT));
8726 SDValue SRA =
8727 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8728 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8729 }
8730 }
8731 }
8732
8733 // Simplify, based on bits shifted out of the LHS.
8734 if (SimplifyDemandedBits(SDValue(N, 0)))
8735 return SDValue(N, 0);
8736
8737 // If the sign bit is known to be zero, switch this to a SRL.
8738 if (DAG.SignBitIsZero(N0))
8739 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8740
8741 if (N1C && !N1C->isOpaque())
8742 if (SDValue NewSRA = visitShiftByConstant(N))
8743 return NewSRA;
8744
8745 // Try to transform this shift into a multiply-high if
8746 // it matches the appropriate pattern detected in combineShiftToMULH.
8747 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8748 return MULH;
8749
8750 return SDValue();
8751}
8752
8753SDValue DAGCombiner::visitSRL(SDNode *N) {
8754 SDValue N0 = N->getOperand(0);
8755 SDValue N1 = N->getOperand(1);
8756 if (SDValue V = DAG.simplifyShift(N0, N1))
8757 return V;
8758
8759 EVT VT = N0.getValueType();
8760 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8761
8762 // fold vector ops
8763 if (VT.isVector())
8764 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8765 return FoldedVOp;
8766
8767 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8768
8769 // fold (srl c1, c2) -> c1 >>u c2
8770 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8771 return C;
8772
8773 if (SDValue NewSel = foldBinOpIntoSelect(N))
8774 return NewSel;
8775
8776 // if (srl x, c) is known to be zero, return 0
8777 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8778 APInt::getAllOnesValue(OpSizeInBits)))
8779 return DAG.getConstant(0, SDLoc(N), VT);
8780
8781 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8782 if (N0.getOpcode() == ISD::SRL) {
8783 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8784 ConstantSDNode *RHS) {
8785 APInt c1 = LHS->getAPIntValue();
8786 APInt c2 = RHS->getAPIntValue();
8787 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8788 return (c1 + c2).uge(OpSizeInBits);
8789 };
8790 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8791 return DAG.getConstant(0, SDLoc(N), VT);
8792
8793 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8794 ConstantSDNode *RHS) {
8795 APInt c1 = LHS->getAPIntValue();
8796 APInt c2 = RHS->getAPIntValue();
8797 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8798 return (c1 + c2).ult(OpSizeInBits);
8799 };
8800 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8801 SDLoc DL(N);
8802 EVT ShiftVT = N1.getValueType();
8803 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8804 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8805 }
8806 }
8807
8808 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8809 N0.getOperand(0).getOpcode() == ISD::SRL) {
8810 SDValue InnerShift = N0.getOperand(0);
8811 // TODO - support non-uniform vector shift amounts.
8812 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8813 uint64_t c1 = N001C->getZExtValue();
8814 uint64_t c2 = N1C->getZExtValue();
8815 EVT InnerShiftVT = InnerShift.getValueType();
8816 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8817 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8818 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8819 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8820 if (c1 + OpSizeInBits == InnerShiftSize) {
8821 SDLoc DL(N);
8822 if (c1 + c2 >= InnerShiftSize)
8823 return DAG.getConstant(0, DL, VT);
8824 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8825 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8826 InnerShift.getOperand(0), NewShiftAmt);
8827 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8828 }
8829 // In the more general case, we can clear the high bits after the shift:
8830 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8831 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8832 c1 + c2 < InnerShiftSize) {
8833 SDLoc DL(N);
8834 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8835 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8836 InnerShift.getOperand(0), NewShiftAmt);
8837 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8838 OpSizeInBits - c2),
8839 DL, InnerShiftVT);
8840 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8841 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8842 }
8843 }
8844 }
8845
8846 // fold (srl (shl x, c), c) -> (and x, cst2)
8847 // TODO - (srl (shl x, c1), c2).
8848 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8849 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8850 SDLoc DL(N);
8851 SDValue Mask =
8852 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8853 AddToWorklist(Mask.getNode());
8854 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8855 }
8856
8857 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8858 // TODO - support non-uniform vector shift amounts.
8859 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8860 // Shifting in all undef bits?
8861 EVT SmallVT = N0.getOperand(0).getValueType();
8862 unsigned BitSize = SmallVT.getScalarSizeInBits();
8863 if (N1C->getAPIntValue().uge(BitSize))
8864 return DAG.getUNDEF(VT);
8865
8866 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8867 uint64_t ShiftAmt = N1C->getZExtValue();
8868 SDLoc DL0(N0);
8869 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8870 N0.getOperand(0),
8871 DAG.getConstant(ShiftAmt, DL0,
8872 getShiftAmountTy(SmallVT)));
8873 AddToWorklist(SmallShift.getNode());
8874 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8875 SDLoc DL(N);
8876 return DAG.getNode(ISD::AND, DL, VT,
8877 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8878 DAG.getConstant(Mask, DL, VT));
8879 }
8880 }
8881
8882 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8883 // bit, which is unmodified by sra.
8884 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8885 if (N0.getOpcode() == ISD::SRA)
8886 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8887 }
8888
8889 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8890 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8891 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8892 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8893
8894 // If any of the input bits are KnownOne, then the input couldn't be all
8895 // zeros, thus the result of the srl will always be zero.
8896 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8897
8898 // If all of the bits input the to ctlz node are known to be zero, then
8899 // the result of the ctlz is "32" and the result of the shift is one.
8900 APInt UnknownBits = ~Known.Zero;
8901 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8902
8903 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8904 if (UnknownBits.isPowerOf2()) {
8905 // Okay, we know that only that the single bit specified by UnknownBits
8906 // could be set on input to the CTLZ node. If this bit is set, the SRL
8907 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8908 // to an SRL/XOR pair, which is likely to simplify more.
8909 unsigned ShAmt = UnknownBits.countTrailingZeros();
8910 SDValue Op = N0.getOperand(0);
8911
8912 if (ShAmt) {
8913 SDLoc DL(N0);
8914 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8915 DAG.getConstant(ShAmt, DL,
8916 getShiftAmountTy(Op.getValueType())));
8917 AddToWorklist(Op.getNode());
8918 }
8919
8920 SDLoc DL(N);
8921 return DAG.getNode(ISD::XOR, DL, VT,
8922 Op, DAG.getConstant(1, DL, VT));
8923 }
8924 }
8925
8926 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8927 if (N1.getOpcode() == ISD::TRUNCATE &&
8928 N1.getOperand(0).getOpcode() == ISD::AND) {
8929 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8930 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8931 }
8932
8933 // fold operands of srl based on knowledge that the low bits are not
8934 // demanded.
8935 if (SimplifyDemandedBits(SDValue(N, 0)))
8936 return SDValue(N, 0);
8937
8938 if (N1C && !N1C->isOpaque())
8939 if (SDValue NewSRL = visitShiftByConstant(N))
8940 return NewSRL;
8941
8942 // Attempt to convert a srl of a load into a narrower zero-extending load.
8943 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8944 return NarrowLoad;
8945
8946 // Here is a common situation. We want to optimize:
8947 //
8948 // %a = ...
8949 // %b = and i32 %a, 2
8950 // %c = srl i32 %b, 1
8951 // brcond i32 %c ...
8952 //
8953 // into
8954 //
8955 // %a = ...
8956 // %b = and %a, 2
8957 // %c = setcc eq %b, 0
8958 // brcond %c ...
8959 //
8960 // However when after the source operand of SRL is optimized into AND, the SRL
8961 // itself may not be optimized further. Look for it and add the BRCOND into
8962 // the worklist.
8963 if (N->hasOneUse()) {
8964 SDNode *Use = *N->use_begin();
8965 if (Use->getOpcode() == ISD::BRCOND)
8966 AddToWorklist(Use);
8967 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8968 // Also look pass the truncate.
8969 Use = *Use->use_begin();
8970 if (Use->getOpcode() == ISD::BRCOND)
8971 AddToWorklist(Use);
8972 }
8973 }
8974
8975 // Try to transform this shift into a multiply-high if
8976 // it matches the appropriate pattern detected in combineShiftToMULH.
8977 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8978 return MULH;
8979
8980 return SDValue();
8981}
8982
8983SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8984 EVT VT = N->getValueType(0);
8985 SDValue N0 = N->getOperand(0);
8986 SDValue N1 = N->getOperand(1);
8987 SDValue N2 = N->getOperand(2);
8988 bool IsFSHL = N->getOpcode() == ISD::FSHL;
8989 unsigned BitWidth = VT.getScalarSizeInBits();
8990
8991 // fold (fshl N0, N1, 0) -> N0
8992 // fold (fshr N0, N1, 0) -> N1
8993 if (isPowerOf2_32(BitWidth))
8994 if (DAG.MaskedValueIsZero(
8995 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8996 return IsFSHL ? N0 : N1;
8997
8998 auto IsUndefOrZero = [](SDValue V) {
8999 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9000 };
9001
9002 // TODO - support non-uniform vector shift amounts.
9003 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9004 EVT ShAmtTy = N2.getValueType();
9005
9006 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9007 if (Cst->getAPIntValue().uge(BitWidth)) {
9008 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9009 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9010 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9011 }
9012
9013 unsigned ShAmt = Cst->getZExtValue();
9014 if (ShAmt == 0)
9015 return IsFSHL ? N0 : N1;
9016
9017 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9018 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9019 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9020 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9021 if (IsUndefOrZero(N0))
9022 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9023 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9024 SDLoc(N), ShAmtTy));
9025 if (IsUndefOrZero(N1))
9026 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9027 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9028 SDLoc(N), ShAmtTy));
9029
9030 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9031 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9032 // TODO - bigendian support once we have test coverage.
9033 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9034 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9035 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9036 !DAG.getDataLayout().isBigEndian()) {
9037 auto *LHS = dyn_cast<LoadSDNode>(N0);
9038 auto *RHS = dyn_cast<LoadSDNode>(N1);
9039 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9040 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9041 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9042 ISD::isNON_EXTLoad(LHS)) {
9043 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9044 SDLoc DL(RHS);
9045 uint64_t PtrOff =
9046 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9047 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9048 bool Fast = false;
9049 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9050 RHS->getAddressSpace(), NewAlign,
9051 RHS->getMemOperand()->getFlags(), &Fast) &&
9052 Fast) {
9053 SDValue NewPtr = DAG.getMemBasePlusOffset(
9054 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9055 AddToWorklist(NewPtr.getNode());
9056 SDValue Load = DAG.getLoad(
9057 VT, DL, RHS->getChain(), NewPtr,
9058 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9059 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9060 // Replace the old load's chain with the new load's chain.
9061 WorklistRemover DeadNodes(*this);
9062 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9063 return Load;
9064 }
9065 }
9066 }
9067 }
9068 }
9069
9070 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9071 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9072 // iff We know the shift amount is in range.
9073 // TODO: when is it worth doing SUB(BW, N2) as well?
9074 if (isPowerOf2_32(BitWidth)) {
9075 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9076 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9077 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9078 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9079 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9080 }
9081
9082 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9083 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9084 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9085 // is legal as well we might be better off avoiding non-constant (BW - N2).
9086 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9087 if (N0 == N1 && hasOperation(RotOpc, VT))
9088 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9089
9090 // Simplify, based on bits shifted out of N0/N1.
9091 if (SimplifyDemandedBits(SDValue(N, 0)))
9092 return SDValue(N, 0);
9093
9094 return SDValue();
9095}
9096
9097// Given a ABS node, detect the following pattern:
9098// (ABS (SUB (EXTEND a), (EXTEND b))).
9099// Generates UABD/SABD instruction.
9100static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9101 const TargetLowering &TLI) {
9102 SDValue AbsOp1 = N->getOperand(0);
9103 SDValue Op0, Op1;
9104
9105 if (AbsOp1.getOpcode() != ISD::SUB)
9106 return SDValue();
9107
9108 Op0 = AbsOp1.getOperand(0);
9109 Op1 = AbsOp1.getOperand(1);
9110
9111 unsigned Opc0 = Op0.getOpcode();
9112 // Check if the operands of the sub are (zero|sign)-extended.
9113 if (Opc0 != Op1.getOpcode() ||
9114 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9115 return SDValue();
9116
9117 EVT VT1 = Op0.getOperand(0).getValueType();
9118 EVT VT2 = Op1.getOperand(0).getValueType();
9119 // Check if the operands are of same type and valid size.
9120 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9121 if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9122 return SDValue();
9123
9124 Op0 = Op0.getOperand(0);
9125 Op1 = Op1.getOperand(0);
9126 SDValue ABD =
9127 DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9128 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9129}
9130
9131SDValue DAGCombiner::visitABS(SDNode *N) {
9132 SDValue N0 = N->getOperand(0);
9133 EVT VT = N->getValueType(0);
9134
9135 // fold (abs c1) -> c2
9136 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9137 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9138 // fold (abs (abs x)) -> (abs x)
9139 if (N0.getOpcode() == ISD::ABS)
9140 return N0;
9141 // fold (abs x) -> x iff not-negative
9142 if (DAG.SignBitIsZero(N0))
9143 return N0;
9144
9145 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9146 return ABD;
9147
9148 return SDValue();
9149}
9150
9151SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9152 SDValue N0 = N->getOperand(0);
9153 EVT VT = N->getValueType(0);
9154
9155 // fold (bswap c1) -> c2
9156 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9157 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9158 // fold (bswap (bswap x)) -> x
9159 if (N0.getOpcode() == ISD::BSWAP)
9160 return N0->getOperand(0);
9161 return SDValue();
9162}
9163
9164SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9165 SDValue N0 = N->getOperand(0);
9166 EVT VT = N->getValueType(0);
9167
9168 // fold (bitreverse c1) -> c2
9169 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9170 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9171 // fold (bitreverse (bitreverse x)) -> x
9172 if (N0.getOpcode() == ISD::BITREVERSE)
9173 return N0.getOperand(0);
9174 return SDValue();
9175}
9176
9177SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9178 SDValue N0 = N->getOperand(0);
9179 EVT VT = N->getValueType(0);
9180
9181 // fold (ctlz c1) -> c2
9182 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9183 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9184
9185 // If the value is known never to be zero, switch to the undef version.
9186 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9187 if (DAG.isKnownNeverZero(N0))
9188 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9189 }
9190
9191 return SDValue();
9192}
9193
9194SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9195 SDValue N0 = N->getOperand(0);
9196 EVT VT = N->getValueType(0);
9197
9198 // fold (ctlz_zero_undef c1) -> c2
9199 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9200 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9201 return SDValue();
9202}
9203
9204SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9205 SDValue N0 = N->getOperand(0);
9206 EVT VT = N->getValueType(0);
9207
9208 // fold (cttz c1) -> c2
9209 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9210 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9211
9212 // If the value is known never to be zero, switch to the undef version.
9213 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9214 if (DAG.isKnownNeverZero(N0))
9215 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9216 }
9217
9218 return SDValue();
9219}
9220
9221SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9222 SDValue N0 = N->getOperand(0);
9223 EVT VT = N->getValueType(0);
9224
9225 // fold (cttz_zero_undef c1) -> c2
9226 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9227 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9228 return SDValue();
9229}
9230
9231SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9232 SDValue N0 = N->getOperand(0);
9233 EVT VT = N->getValueType(0);
9234
9235 // fold (ctpop c1) -> c2
9236 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9237 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9238 return SDValue();
9239}
9240
9241// FIXME: This should be checking for no signed zeros on individual operands, as
9242// well as no nans.
9243static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9244 SDValue RHS,
9245 const TargetLowering &TLI) {
9246 const TargetOptions &Options = DAG.getTarget().Options;
9247 EVT VT = LHS.getValueType();
9248
9249 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9250 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9251 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9252}
9253
9254/// Generate Min/Max node
9255static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9256 SDValue RHS, SDValue True, SDValue False,
9257 ISD::CondCode CC, const TargetLowering &TLI,
9258 SelectionDAG &DAG) {
9259 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9260 return SDValue();
9261
9262 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9263 switch (CC) {
9264 case ISD::SETOLT:
9265 case ISD::SETOLE:
9266 case ISD::SETLT:
9267 case ISD::SETLE:
9268 case ISD::SETULT:
9269 case ISD::SETULE: {
9270 // Since it's known never nan to get here already, either fminnum or
9271 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9272 // expanded in terms of it.
9273 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9274 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9275 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9276
9277 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9278 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9279 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9280 return SDValue();
9281 }
9282 case ISD::SETOGT:
9283 case ISD::SETOGE:
9284 case ISD::SETGT:
9285 case ISD::SETGE:
9286 case ISD::SETUGT:
9287 case ISD::SETUGE: {
9288 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9289 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9290 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9291
9292 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9293 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9294 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9295 return SDValue();
9296 }
9297 default:
9298 return SDValue();
9299 }
9300}
9301
9302/// If a (v)select has a condition value that is a sign-bit test, try to smear
9303/// the condition operand sign-bit across the value width and use it as a mask.
9304static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9305 SDValue Cond = N->getOperand(0);
9306 SDValue C1 = N->getOperand(1);
9307 SDValue C2 = N->getOperand(2);
9308 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9309 return SDValue();
9310
9311 EVT VT = N->getValueType(0);
9312 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9313 VT != Cond.getOperand(0).getValueType())
9314 return SDValue();
9315
9316 // The inverted-condition + commuted-select variants of these patterns are
9317 // canonicalized to these forms in IR.
9318 SDValue X = Cond.getOperand(0);
9319 SDValue CondC = Cond.getOperand(1);
9320 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9321 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9322 isAllOnesOrAllOnesSplat(C2)) {
9323 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9324 SDLoc DL(N);
9325 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9326 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9327 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9328 }
9329 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9330 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9331 SDLoc DL(N);
9332 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9333 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9334 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9335 }
9336 return SDValue();
9337}
9338
9339SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9340 SDValue Cond = N->getOperand(0);
9341 SDValue N1 = N->getOperand(1);
9342 SDValue N2 = N->getOperand(2);
9343 EVT VT = N->getValueType(0);
9344 EVT CondVT = Cond.getValueType();
9345 SDLoc DL(N);
9346
9347 if (!VT.isInteger())
9348 return SDValue();
9349
9350 auto *C1 = dyn_cast<ConstantSDNode>(N1);
9351 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9352 if (!C1 || !C2)
9353 return SDValue();
9354
9355 // Only do this before legalization to avoid conflicting with target-specific
9356 // transforms in the other direction (create a select from a zext/sext). There
9357 // is also a target-independent combine here in DAGCombiner in the other
9358 // direction for (select Cond, -1, 0) when the condition is not i1.
9359 if (CondVT == MVT::i1 && !LegalOperations) {
9360 if (C1->isNullValue() && C2->isOne()) {
9361 // select Cond, 0, 1 --> zext (!Cond)
9362 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9363 if (VT != MVT::i1)
9364 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9365 return NotCond;
9366 }
9367 if (C1->isNullValue() && C2->isAllOnesValue()) {
9368 // select Cond, 0, -1 --> sext (!Cond)
9369 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9370 if (VT != MVT::i1)
9371 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9372 return NotCond;
9373 }
9374 if (C1->isOne() && C2->isNullValue()) {
9375 // select Cond, 1, 0 --> zext (Cond)
9376 if (VT != MVT::i1)
9377 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9378 return Cond;
9379 }
9380 if (C1->isAllOnesValue() && C2->isNullValue()) {
9381 // select Cond, -1, 0 --> sext (Cond)
9382 if (VT != MVT::i1)
9383 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9384 return Cond;
9385 }
9386
9387 // Use a target hook because some targets may prefer to transform in the
9388 // other direction.
9389 if (TLI.convertSelectOfConstantsToMath(VT)) {
9390 // For any constants that differ by 1, we can transform the select into an
9391 // extend and add.
9392 const APInt &C1Val = C1->getAPIntValue();
9393 const APInt &C2Val = C2->getAPIntValue();
9394 if (C1Val - 1 == C2Val) {
9395 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9396 if (VT != MVT::i1)
9397 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9398 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9399 }
9400 if (C1Val + 1 == C2Val) {
9401 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9402 if (VT != MVT::i1)
9403 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9404 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9405 }
9406
9407 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9408 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9409 if (VT != MVT::i1)
9410 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9411 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9412 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9413 }
9414
9415 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9416 return V;
9417 }
9418
9419 return SDValue();
9420 }
9421
9422 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9423 // We can't do this reliably if integer based booleans have different contents
9424 // to floating point based booleans. This is because we can't tell whether we
9425 // have an integer-based boolean or a floating-point-based boolean unless we
9426 // can find the SETCC that produced it and inspect its operands. This is
9427 // fairly easy if C is the SETCC node, but it can potentially be
9428 // undiscoverable (or not reasonably discoverable). For example, it could be
9429 // in another basic block or it could require searching a complicated
9430 // expression.
9431 if (CondVT.isInteger() &&
9432 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9433 TargetLowering::ZeroOrOneBooleanContent &&
9434 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9435 TargetLowering::ZeroOrOneBooleanContent &&
9436 C1->isNullValue() && C2->isOne()) {
9437 SDValue NotCond =
9438 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9439 if (VT.bitsEq(CondVT))
9440 return NotCond;
9441 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9442 }
9443
9444 return SDValue();
9445}
9446
9447static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9448 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&((void)0)
9449 "Expected a (v)select")((void)0);
9450 SDValue Cond = N->getOperand(0);
9451 SDValue T = N->getOperand(1), F = N->getOperand(2);
9452 EVT VT = N->getValueType(0);
9453 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9454 return SDValue();
9455
9456 // select Cond, Cond, F --> or Cond, F
9457 // select Cond, 1, F --> or Cond, F
9458 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9459 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9460
9461 // select Cond, T, Cond --> and Cond, T
9462 // select Cond, T, 0 --> and Cond, T
9463 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9464 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9465
9466 // select Cond, T, 1 --> or (not Cond), T
9467 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9468 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9469 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9470 }
9471
9472 // select Cond, 0, F --> and (not Cond), F
9473 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9474 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9475 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9476 }
9477
9478 return SDValue();
9479}
9480
9481SDValue DAGCombiner::visitSELECT(SDNode *N) {
9482 SDValue N0 = N->getOperand(0);
9483 SDValue N1 = N->getOperand(1);
9484 SDValue N2 = N->getOperand(2);
9485 EVT VT = N->getValueType(0);
9486 EVT VT0 = N0.getValueType();
9487 SDLoc DL(N);
9488 SDNodeFlags Flags = N->getFlags();
9489
9490 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9491 return V;
9492
9493 if (SDValue V = foldSelectOfConstants(N))
9494 return V;
9495
9496 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9497 return V;
9498
9499 // If we can fold this based on the true/false value, do so.
9500 if (SimplifySelectOps(N, N1, N2))
9501 return SDValue(N, 0); // Don't revisit N.
9502
9503 if (VT0 == MVT::i1) {
9504 // The code in this block deals with the following 2 equivalences:
9505 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9506 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9507 // The target can specify its preferred form with the
9508 // shouldNormalizeToSelectSequence() callback. However we always transform
9509 // to the right anyway if we find the inner select exists in the DAG anyway
9510 // and we always transform to the left side if we know that we can further
9511 // optimize the combination of the conditions.
9512 bool normalizeToSequence =
9513 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9514 // select (and Cond0, Cond1), X, Y
9515 // -> select Cond0, (select Cond1, X, Y), Y
9516 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9517 SDValue Cond0 = N0->getOperand(0);
9518 SDValue Cond1 = N0->getOperand(1);
9519 SDValue InnerSelect =
9520 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9521 if (normalizeToSequence || !InnerSelect.use_empty())
9522 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9523 InnerSelect, N2, Flags);
9524 // Cleanup on failure.
9525 if (InnerSelect.use_empty())
9526 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9527 }
9528 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9529 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9530 SDValue Cond0 = N0->getOperand(0);
9531 SDValue Cond1 = N0->getOperand(1);
9532 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9533 Cond1, N1, N2, Flags);
9534 if (normalizeToSequence || !InnerSelect.use_empty())
9535 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9536 InnerSelect, Flags);
9537 // Cleanup on failure.
9538 if (InnerSelect.use_empty())
9539 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9540 }
9541
9542 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9543 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9544 SDValue N1_0 = N1->getOperand(0);
9545 SDValue N1_1 = N1->getOperand(1);
9546 SDValue N1_2 = N1->getOperand(2);
9547 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9548 // Create the actual and node if we can generate good code for it.
9549 if (!normalizeToSequence) {
9550 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9551 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9552 N2, Flags);
9553 }
9554 // Otherwise see if we can optimize the "and" to a better pattern.
9555 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9556 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9557 N2, Flags);
9558 }
9559 }
9560 }
9561 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9562 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9563 SDValue N2_0 = N2->getOperand(0);
9564 SDValue N2_1 = N2->getOperand(1);
9565 SDValue N2_2 = N2->getOperand(2);
9566 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9567 // Create the actual or node if we can generate good code for it.
9568 if (!normalizeToSequence) {
9569 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9570 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9571 N2_2, Flags);
9572 }
9573 // Otherwise see if we can optimize to a better pattern.
9574 if (SDValue Combined = visitORLike(N0, N2_0, N))
9575 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9576 N2_2, Flags);
9577 }
9578 }
9579 }
9580
9581 // select (not Cond), N1, N2 -> select Cond, N2, N1
9582 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9583 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9584 SelectOp->setFlags(Flags);
9585 return SelectOp;
9586 }
9587
9588 // Fold selects based on a setcc into other things, such as min/max/abs.
9589 if (N0.getOpcode() == ISD::SETCC) {
9590 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9591 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9592
9593 // select (fcmp lt x, y), x, y -> fminnum x, y
9594 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9595 //
9596 // This is OK if we don't care what happens if either operand is a NaN.
9597 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9598 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9599 CC, TLI, DAG))
9600 return FMinMax;
9601
9602 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9603 // This is conservatively limited to pre-legal-operations to give targets
9604 // a chance to reverse the transform if they want to do that. Also, it is
9605 // unlikely that the pattern would be formed late, so it's probably not
9606 // worth going through the other checks.
9607 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9608 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9609 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9610 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9611 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9612 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9613 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9614 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9615 //
9616 // The IR equivalent of this transform would have this form:
9617 // %a = add %x, C
9618 // %c = icmp ugt %x, ~C
9619 // %r = select %c, -1, %a
9620 // =>
9621 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9622 // %u0 = extractvalue %u, 0
9623 // %u1 = extractvalue %u, 1
9624 // %r = select %u1, -1, %u0
9625 SDVTList VTs = DAG.getVTList(VT, VT0);
9626 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9627 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9628 }
9629 }
9630
9631 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9632 (!LegalOperations &&
9633 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9634 // Any flags available in a select/setcc fold will be on the setcc as they
9635 // migrated from fcmp
9636 Flags = N0.getNode()->getFlags();
9637 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9638 N2, N0.getOperand(2));
9639 SelectNode->setFlags(Flags);
9640 return SelectNode;
9641 }
9642
9643 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9644 return NewSel;
9645 }
9646
9647 if (!VT.isVector())
9648 if (SDValue BinOp = foldSelectOfBinops(N))
9649 return BinOp;
9650
9651 return SDValue();
9652}
9653
9654// This function assumes all the vselect's arguments are CONCAT_VECTOR
9655// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9656static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9657 SDLoc DL(N);
9658 SDValue Cond = N->getOperand(0);
9659 SDValue LHS = N->getOperand(1);
9660 SDValue RHS = N->getOperand(2);
9661 EVT VT = N->getValueType(0);
9662 int NumElems = VT.getVectorNumElements();
9663 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&((void)0)
9664 RHS.getOpcode() == ISD::CONCAT_VECTORS &&((void)0)
9665 Cond.getOpcode() == ISD::BUILD_VECTOR)((void)0);
9666
9667 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9668 // binary ones here.
9669 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9670 return SDValue();
9671
9672 // We're sure we have an even number of elements due to the
9673 // concat_vectors we have as arguments to vselect.
9674 // Skip BV elements until we find one that's not an UNDEF
9675 // After we find an UNDEF element, keep looping until we get to half the
9676 // length of the BV and see if all the non-undef nodes are the same.
9677 ConstantSDNode *BottomHalf = nullptr;
9678 for (int i = 0; i < NumElems / 2; ++i) {
9679 if (Cond->getOperand(i)->isUndef())
9680 continue;
9681
9682 if (BottomHalf == nullptr)
9683 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9684 else if (Cond->getOperand(i).getNode() != BottomHalf)
9685 return SDValue();
9686 }
9687
9688 // Do the same for the second half of the BuildVector
9689 ConstantSDNode *TopHalf = nullptr;
9690 for (int i = NumElems / 2; i < NumElems; ++i) {
9691 if (Cond->getOperand(i)->isUndef())
9692 continue;
9693
9694 if (TopHalf == nullptr)
9695 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9696 else if (Cond->getOperand(i).getNode() != TopHalf)
9697 return SDValue();
9698 }
9699
9700 assert(TopHalf && BottomHalf &&((void)0)
9701 "One half of the selector was all UNDEFs and the other was all the "((void)0)
9702 "same value. This should have been addressed before this function.")((void)0);
9703 return DAG.getNode(
9704 ISD::CONCAT_VECTORS, DL, VT,
9705 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9706 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9707}
9708
9709bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9710 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9711 return false;
9712
9713 // For now we check only the LHS of the add.
9714 SDValue LHS = Index.getOperand(0);
9715 SDValue SplatVal = DAG.getSplatValue(LHS);
9716 if (!SplatVal)
9717 return false;
9718
9719 BasePtr = SplatVal;
9720 Index = Index.getOperand(1);
9721 return true;
9722}
9723
9724// Fold sext/zext of index into index type.
9725bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9726 bool Scaled, SelectionDAG &DAG) {
9727 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9728
9729 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9730 SDValue Op = Index.getOperand(0);
9731 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9732 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9733 Index = Op;
9734 return true;
9735 }
9736 }
9737
9738 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9739 SDValue Op = Index.getOperand(0);
9740 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9741 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9742 Index = Op;
9743 return true;
9744 }
9745 }
9746
9747 return false;
9748}
9749
9750SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9751 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9752 SDValue Mask = MSC->getMask();
9753 SDValue Chain = MSC->getChain();
9754 SDValue Index = MSC->getIndex();
9755 SDValue Scale = MSC->getScale();
9756 SDValue StoreVal = MSC->getValue();
9757 SDValue BasePtr = MSC->getBasePtr();
9758 SDLoc DL(N);
9759
9760 // Zap scatters with a zero mask.
9761 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9762 return Chain;
9763
9764 if (refineUniformBase(BasePtr, Index, DAG)) {
9765 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9766 return DAG.getMaskedScatter(
9767 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9768 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9769 }
9770
9771 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9772 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9773 return DAG.getMaskedScatter(
9774 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9775 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9776 }
9777
9778 return SDValue();
9779}
9780
9781SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9782 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9783 SDValue Mask = MST->getMask();
9784 SDValue Chain = MST->getChain();
9785 SDLoc DL(N);
9786
9787 // Zap masked stores with a zero mask.
9788 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9789 return Chain;
9790
9791 // If this is a masked load with an all ones mask, we can use a unmasked load.
9792 // FIXME: Can we do this for indexed, compressing, or truncating stores?
9793 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9794 MST->isUnindexed() && !MST->isCompressingStore() &&
9795 !MST->isTruncatingStore())
9796 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9797 MST->getBasePtr(), MST->getMemOperand());
9798
9799 // Try transforming N to an indexed store.
9800 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9801 return SDValue(N, 0);
9802
9803 return SDValue();
9804}
9805
9806SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9807 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9808 SDValue Mask = MGT->getMask();
9809 SDValue Chain = MGT->getChain();
9810 SDValue Index = MGT->getIndex();
9811 SDValue Scale = MGT->getScale();
9812 SDValue PassThru = MGT->getPassThru();
9813 SDValue BasePtr = MGT->getBasePtr();
9814 SDLoc DL(N);
9815
9816 // Zap gathers with a zero mask.
9817 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9818 return CombineTo(N, PassThru, MGT->getChain());
9819
9820 if (refineUniformBase(BasePtr, Index, DAG)) {
9821 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9822 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9823 MGT->getMemoryVT(), DL, Ops,
9824 MGT->getMemOperand(), MGT->getIndexType(),
9825 MGT->getExtensionType());
9826 }
9827
9828 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9829 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9830 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9831 MGT->getMemoryVT(), DL, Ops,
9832 MGT->getMemOperand(), MGT->getIndexType(),
9833 MGT->getExtensionType());
9834 }
9835
9836 return SDValue();
9837}
9838
9839SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9840 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9841 SDValue Mask = MLD->getMask();
9842 SDLoc DL(N);
9843
9844 // Zap masked loads with a zero mask.
9845 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9846 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9847
9848 // If this is a masked load with an all ones mask, we can use a unmasked load.
9849 // FIXME: Can we do this for indexed, expanding, or extending loads?
9850 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9851 MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9852 MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9853 SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9854 MLD->getBasePtr(), MLD->getMemOperand());
9855 return CombineTo(N, NewLd, NewLd.getValue(1));
9856 }
9857
9858 // Try transforming N to an indexed load.
9859 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9860 return SDValue(N, 0);
9861
9862 return SDValue();
9863}
9864
9865/// A vector select of 2 constant vectors can be simplified to math/logic to
9866/// avoid a variable select instruction and possibly avoid constant loads.
9867SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9868 SDValue Cond = N->getOperand(0);
9869 SDValue N1 = N->getOperand(1);
9870 SDValue N2 = N->getOperand(2);
9871 EVT VT = N->getValueType(0);
9872 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9873 !TLI.convertSelectOfConstantsToMath(VT) ||
9874 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9875 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9876 return SDValue();
9877
9878 // Check if we can use the condition value to increment/decrement a single
9879 // constant value. This simplifies a select to an add and removes a constant
9880 // load/materialization from the general case.
9881 bool AllAddOne = true;
9882 bool AllSubOne = true;
9883 unsigned Elts = VT.getVectorNumElements();
9884 for (unsigned i = 0; i != Elts; ++i) {
9885 SDValue N1Elt = N1.getOperand(i);
9886 SDValue N2Elt = N2.getOperand(i);
9887 if (N1Elt.isUndef() || N2Elt.isUndef())
9888 continue;
9889 if (N1Elt.getValueType() != N2Elt.getValueType())
9890 continue;
9891
9892 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9893 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9894 if (C1 != C2 + 1)
9895 AllAddOne = false;
9896 if (C1 != C2 - 1)
9897 AllSubOne = false;
9898 }
9899
9900 // Further simplifications for the extra-special cases where the constants are
9901 // all 0 or all -1 should be implemented as folds of these patterns.
9902 SDLoc DL(N);
9903 if (AllAddOne || AllSubOne) {
9904 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9905 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9906 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9907 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9908 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9909 }
9910
9911 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9912 APInt Pow2C;
9913 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9914 isNullOrNullSplat(N2)) {
9915 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9916 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9917 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9918 }
9919
9920 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9921 return V;
9922
9923 // The general case for select-of-constants:
9924 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9925 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9926 // leave that to a machine-specific pass.
9927 return SDValue();
9928}
9929
9930SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9931 SDValue N0 = N->getOperand(0);
9932 SDValue N1 = N->getOperand(1);
9933 SDValue N2 = N->getOperand(2);
9934 EVT VT = N->getValueType(0);
9935 SDLoc DL(N);
9936
9937 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9938 return V;
9939
9940 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9941 return V;
9942
9943 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9944 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9945 return DAG.getSelect(DL, VT, F, N2, N1);
9946
9947 // Canonicalize integer abs.
9948 // vselect (setg[te] X, 0), X, -X ->
9949 // vselect (setgt X, -1), X, -X ->
9950 // vselect (setl[te] X, 0), -X, X ->
9951 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9952 if (N0.getOpcode() == ISD::SETCC) {
9953 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9954 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9955 bool isAbs = false;
9956 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9957
9958 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9959 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9960 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9961 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9962 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9963 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9964 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9965
9966 if (isAbs) {
9967 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9968 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9969
9970 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9971 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9972 DL, getShiftAmountTy(VT)));
9973 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9974 AddToWorklist(Shift.getNode());
9975 AddToWorklist(Add.getNode());
9976 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9977 }
9978
9979 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9980 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9981 //
9982 // This is OK if we don't care about what happens if either operand is a
9983 // NaN.
9984 //
9985 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9986 if (SDValue FMinMax =
9987 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9988 return FMinMax;
9989 }
9990
9991 // If this select has a condition (setcc) with narrower operands than the
9992 // select, try to widen the compare to match the select width.
9993 // TODO: This should be extended to handle any constant.
9994 // TODO: This could be extended to handle non-loading patterns, but that
9995 // requires thorough testing to avoid regressions.
9996 if (isNullOrNullSplat(RHS)) {
9997 EVT NarrowVT = LHS.getValueType();
9998 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9999 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10000 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10001 unsigned WideWidth = WideVT.getScalarSizeInBits();
10002 bool IsSigned = isSignedIntSetCC(CC);
10003 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10004 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10005 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10006 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10007 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10008 // Both compare operands can be widened for free. The LHS can use an
10009 // extended load, and the RHS is a constant:
10010 // vselect (ext (setcc load(X), C)), N1, N2 -->
10011 // vselect (setcc extload(X), C'), N1, N2
10012 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10013 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10014 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10015 EVT WideSetCCVT = getSetCCResultType(WideVT);
10016 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10017 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10018 }
10019 }
10020
10021 // Match VSELECTs into add with unsigned saturation.
10022 if (hasOperation(ISD::UADDSAT, VT)) {
10023 // Check if one of the arms of the VSELECT is vector with all bits set.
10024 // If it's on the left side invert the predicate to simplify logic below.
10025 SDValue Other;
10026 ISD::CondCode SatCC = CC;
10027 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10028 Other = N2;
10029 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10030 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10031 Other = N1;
10032 }
10033
10034 if (Other && Other.getOpcode() == ISD::ADD) {
10035 SDValue CondLHS = LHS, CondRHS = RHS;
10036 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10037
10038 // Canonicalize condition operands.
10039 if (SatCC == ISD::SETUGE) {
10040 std::swap(CondLHS, CondRHS);
10041 SatCC = ISD::SETULE;
10042 }
10043
10044 // We can test against either of the addition operands.
10045 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10046 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10047 if (SatCC == ISD::SETULE && Other == CondRHS &&
10048 (OpLHS == CondLHS || OpRHS == CondLHS))
10049 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10050
10051 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10052 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10053 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10054 CondLHS == OpLHS) {
10055 // If the RHS is a constant we have to reverse the const
10056 // canonicalization.
10057 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10058 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10059 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10060 };
10061 if (SatCC == ISD::SETULE &&
10062 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10063 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10064 }
10065 }
10066 }
10067
10068 // Match VSELECTs into sub with unsigned saturation.
10069 if (hasOperation(ISD::USUBSAT, VT)) {
10070 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10071 // the left side invert the predicate to simplify logic below.
10072 SDValue Other;
10073 ISD::CondCode SatCC = CC;
10074 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10075 Other = N2;
10076 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10077 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10078 Other = N1;
10079 }
10080
10081 if (Other && Other.getNumOperands() == 2) {
10082 SDValue CondRHS = RHS;
10083 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10084
10085 if (Other.getOpcode() == ISD::SUB &&
10086 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10087 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10088 // Look for a general sub with unsigned saturation first.
10089 // zext(x) >= y ? x - trunc(y) : 0
10090 // --> usubsat(x,trunc(umin(y,SatLimit)))
10091 // zext(x) > y ? x - trunc(y) : 0
10092 // --> usubsat(x,trunc(umin(y,SatLimit)))
10093 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10094 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10095 DL);
10096 }
10097
10098 if (OpLHS == LHS) {
10099 // Look for a general sub with unsigned saturation first.
10100 // x >= y ? x-y : 0 --> usubsat x, y
10101 // x > y ? x-y : 0 --> usubsat x, y
10102 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10103 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10104 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10105
10106 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10107 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10108 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10109 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10110 // If the RHS is a constant we have to reverse the const
10111 // canonicalization.
10112 // x > C-1 ? x+-C : 0 --> usubsat x, C
10113 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10114 return (!Op && !Cond) ||
10115 (Op && Cond &&
10116 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10117 };
10118 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10119 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10120 /*AllowUndefs*/ true)) {
10121 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10122 DAG.getConstant(0, DL, VT), OpRHS);
10123 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10124 }
10125
10126 // Another special case: If C was a sign bit, the sub has been
10127 // canonicalized into a xor.
10128 // FIXME: Would it be better to use computeKnownBits to determine
10129 // whether it's safe to decanonicalize the xor?
10130 // x s< 0 ? x^C : 0 --> usubsat x, C
10131 APInt SplatValue;
10132 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10133 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10134 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10135 SplatValue.isSignMask()) {
10136 // Note that we have to rebuild the RHS constant here to
10137 // ensure we don't rely on particular values of undef lanes.
10138 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10139 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10140 }
10141 }
10142 }
10143 }
10144 }
10145 }
10146 }
10147
10148 if (SimplifySelectOps(N, N1, N2))
10149 return SDValue(N, 0); // Don't revisit N.
10150
10151 // Fold (vselect all_ones, N1, N2) -> N1
10152 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10153 return N1;
10154 // Fold (vselect all_zeros, N1, N2) -> N2
10155 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10156 return N2;
10157
10158 // The ConvertSelectToConcatVector function is assuming both the above
10159 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10160 // and addressed.
10161 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10162 N2.getOpcode() == ISD::CONCAT_VECTORS &&
10163 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10164 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10165 return CV;
10166 }
10167
10168 if (SDValue V = foldVSelectOfConstants(N))
10169 return V;
10170
10171 return SDValue();
10172}
10173
10174SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10175 SDValue N0 = N->getOperand(0);
10176 SDValue N1 = N->getOperand(1);
10177 SDValue N2 = N->getOperand(2);
10178 SDValue N3 = N->getOperand(3);
10179 SDValue N4 = N->getOperand(4);
10180 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10181
10182 // fold select_cc lhs, rhs, x, x, cc -> x
10183 if (N2 == N3)
10184 return N2;
10185
10186 // Determine if the condition we're dealing with is constant
10187 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10188 CC, SDLoc(N), false)) {
10189 AddToWorklist(SCC.getNode());
10190
10191 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10192 if (!SCCC->isNullValue())
10193 return N2; // cond always true -> true val
10194 else
10195 return N3; // cond always false -> false val
10196 } else if (SCC->isUndef()) {
10197 // When the condition is UNDEF, just return the first operand. This is
10198 // coherent the DAG creation, no setcc node is created in this case
10199 return N2;
10200 } else if (SCC.getOpcode() == ISD::SETCC) {
10201 // Fold to a simpler select_cc
10202 SDValue SelectOp = DAG.getNode(
10203 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10204 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10205 SelectOp->setFlags(SCC->getFlags());
10206 return SelectOp;
10207 }
10208 }
10209
10210 // If we can fold this based on the true/false value, do so.
10211 if (SimplifySelectOps(N, N2, N3))
10212 return SDValue(N, 0); // Don't revisit N.
10213
10214 // fold select_cc into other things, such as min/max/abs
10215 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10216}
10217
10218SDValue DAGCombiner::visitSETCC(SDNode *N) {
10219 // setcc is very commonly used as an argument to brcond. This pattern
10220 // also lend itself to numerous combines and, as a result, it is desired
10221 // we keep the argument to a brcond as a setcc as much as possible.
10222 bool PreferSetCC =
10223 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10224
10225 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10226 EVT VT = N->getValueType(0);
10227
10228 // SETCC(FREEZE(X), CONST, Cond)
10229 // =>
10230 // FREEZE(SETCC(X, CONST, Cond))
10231 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10232 // isn't equivalent to true or false.
10233 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10234 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10235 //
10236 // This transformation is beneficial because visitBRCOND can fold
10237 // BRCOND(FREEZE(X)) to BRCOND(X).
10238
10239 // Conservatively optimize integer comparisons only.
10240 if (PreferSetCC) {
10241 // Do this only when SETCC is going to be used by BRCOND.
10242
10243 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10244 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10245 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10246 bool Updated = false;
10247
10248 // Is 'X Cond C' always true or false?
10249 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10250 bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
10251 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10252 (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
10253 (Cond == ISD::SETGT && C->isMaxSignedValue());
10254 bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
10255 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10256 (Cond == ISD::SETUGE && C->isNullValue()) ||
10257 (Cond == ISD::SETGE && C->isMinSignedValue());
10258 return True || False;
10259 };
10260
10261 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10262 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10263 N0 = N0->getOperand(0);
10264 Updated = true;
10265 }
10266 }
10267 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10268 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10269 N0C)) {
10270 N1 = N1->getOperand(0);
10271 Updated = true;
10272 }
10273 }
10274
10275 if (Updated)
10276 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10277 }
10278
10279 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10280 SDLoc(N), !PreferSetCC);
10281
10282 if (!Combined)
10283 return SDValue();
10284
10285 // If we prefer to have a setcc, and we don't, we'll try our best to
10286 // recreate one using rebuildSetCC.
10287 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10288 SDValue NewSetCC = rebuildSetCC(Combined);
10289
10290 // We don't have anything interesting to combine to.
10291 if (NewSetCC.getNode() == N)
10292 return SDValue();
10293
10294 if (NewSetCC)
10295 return NewSetCC;
10296 }
10297
10298 return Combined;
10299}
10300
10301SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10302 SDValue LHS = N->getOperand(0);
10303 SDValue RHS = N->getOperand(1);
10304 SDValue Carry = N->getOperand(2);
10305 SDValue Cond = N->getOperand(3);
10306
10307 // If Carry is false, fold to a regular SETCC.
10308 if (isNullConstant(Carry))
10309 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10310
10311 return SDValue();
10312}
10313
10314/// Check if N satisfies:
10315/// N is used once.
10316/// N is a Load.
10317/// The load is compatible with ExtOpcode. It means
10318/// If load has explicit zero/sign extension, ExpOpcode must have the same
10319/// extension.
10320/// Otherwise returns true.
10321static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10322 if (!N.hasOneUse())
10323 return false;
10324
10325 if (!isa<LoadSDNode>(N))
10326 return false;
10327
10328 LoadSDNode *Load = cast<LoadSDNode>(N);
10329 ISD::LoadExtType LoadExt = Load->getExtensionType();
10330 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10331 return true;
10332
10333 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10334 // extension.
10335 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10336 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10337 return false;
10338
10339 return true;
10340}
10341
10342/// Fold
10343/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10344/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10345/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10346/// This function is called by the DAGCombiner when visiting sext/zext/aext
10347/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10348static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10349 SelectionDAG &DAG) {
10350 unsigned Opcode = N->getOpcode();
10351 SDValue N0 = N->getOperand(0);
10352 EVT VT = N->getValueType(0);
10353 SDLoc DL(N);
10354
10355 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||((void)0)
10356 Opcode == ISD::ANY_EXTEND) &&((void)0)
10357 "Expected EXTEND dag node in input!")((void)0);
10358
10359 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10360 !N0.hasOneUse())
10361 return SDValue();
10362
10363 SDValue Op1 = N0->getOperand(1);
10364 SDValue Op2 = N0->getOperand(2);
10365 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10366 return SDValue();
10367
10368 auto ExtLoadOpcode = ISD::EXTLOAD;
10369 if (Opcode == ISD::SIGN_EXTEND)
10370 ExtLoadOpcode = ISD::SEXTLOAD;
10371 else if (Opcode == ISD::ZERO_EXTEND)
10372 ExtLoadOpcode = ISD::ZEXTLOAD;
10373
10374 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10375 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10376 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10377 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10378 return SDValue();
10379
10380 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10381 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10382 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10383}
10384
10385/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10386/// a build_vector of constants.
10387/// This function is called by the DAGCombiner when visiting sext/zext/aext
10388/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10389/// Vector extends are not folded if operations are legal; this is to
10390/// avoid introducing illegal build_vector dag nodes.
10391static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10392 SelectionDAG &DAG, bool LegalTypes) {
10393 unsigned Opcode = N->getOpcode();
10394 SDValue N0 = N->getOperand(0);
10395 EVT VT = N->getValueType(0);
10396 SDLoc DL(N);
10397
10398 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||((void)0)
10399 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||((void)0)
10400 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)((void)0)
10401 && "Expected EXTEND dag node in input!")((void)0);
10402
10403 // fold (sext c1) -> c1
10404 // fold (zext c1) -> c1
10405 // fold (aext c1) -> c1
10406 if (isa<ConstantSDNode>(N0))
10407 return DAG.getNode(Opcode, DL, VT, N0);
10408
10409 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10410 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10411 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10412 if (N0->getOpcode() == ISD::SELECT) {
10413 SDValue Op1 = N0->getOperand(1);
10414 SDValue Op2 = N0->getOperand(2);
10415 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10416 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10417 // For any_extend, choose sign extension of the constants to allow a
10418 // possible further transform to sign_extend_inreg.i.e.
10419 //
10420 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10421 // t2: i64 = any_extend t1
10422 // -->
10423 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10424 // -->
10425 // t4: i64 = sign_extend_inreg t3
10426 unsigned FoldOpc = Opcode;
10427 if (FoldOpc == ISD::ANY_EXTEND)
10428 FoldOpc = ISD::SIGN_EXTEND;
10429 return DAG.getSelect(DL, VT, N0->getOperand(0),
10430 DAG.getNode(FoldOpc, DL, VT, Op1),
10431 DAG.getNode(FoldOpc, DL, VT, Op2));
10432 }
10433 }
10434
10435 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10436 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10437 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10438 EVT SVT = VT.getScalarType();
10439 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10440 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10441 return SDValue();
10442
10443 // We can fold this node into a build_vector.
10444 unsigned VTBits = SVT.getSizeInBits();
10445 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10446 SmallVector<SDValue, 8> Elts;
10447 unsigned NumElts = VT.getVectorNumElements();
10448
10449 // For zero-extensions, UNDEF elements still guarantee to have the upper
10450 // bits set to zero.
10451 bool IsZext =
10452 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10453
10454 for (unsigned i = 0; i != NumElts; ++i) {
10455 SDValue Op = N0.getOperand(i);
10456 if (Op.isUndef()) {
10457 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10458 continue;
10459 }
10460
10461 SDLoc DL(Op);
10462 // Get the constant value and if needed trunc it to the size of the type.
10463 // Nodes like build_vector might have constants wider than the scalar type.
10464 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10465 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10466 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10467 else
10468 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10469 }
10470
10471 return DAG.getBuildVector(VT, DL, Elts);
10472}
10473
10474// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10475// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10476// transformation. Returns true if extension are possible and the above
10477// mentioned transformation is profitable.
10478static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10479 unsigned ExtOpc,
10480 SmallVectorImpl<SDNode *> &ExtendNodes,
10481 const TargetLowering &TLI) {
10482 bool HasCopyToRegUses = false;
10483 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10484 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10485 UE = N0.getNode()->use_end();
10486 UI != UE; ++UI) {
10487 SDNode *User = *UI;
10488 if (User == N)
10489 continue;
10490 if (UI.getUse().getResNo() != N0.getResNo())
10491 continue;
10492 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10493 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10494 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10495 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10496 // Sign bits will be lost after a zext.
10497 return false;
10498 bool Add = false;
10499 for (unsigned i = 0; i != 2; ++i) {
10500 SDValue UseOp = User->getOperand(i);
10501 if (UseOp == N0)
10502 continue;
10503 if (!isa<ConstantSDNode>(UseOp))
10504 return false;
10505 Add = true;
10506 }
10507 if (Add)
10508 ExtendNodes.push_back(User);
10509 continue;
10510 }
10511 // If truncates aren't free and there are users we can't
10512 // extend, it isn't worthwhile.
10513 if (!isTruncFree)
10514 return false;
10515 // Remember if this value is live-out.
10516 if (User->getOpcode() == ISD::CopyToReg)
10517 HasCopyToRegUses = true;
10518 }
10519
10520 if (HasCopyToRegUses) {
10521 bool BothLiveOut = false;
10522 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10523 UI != UE; ++UI) {
10524 SDUse &Use = UI.getUse();
10525 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10526 BothLiveOut = true;
10527 break;
10528 }
10529 }
10530 if (BothLiveOut)
10531 // Both unextended and extended values are live out. There had better be
10532 // a good reason for the transformation.
10533 return ExtendNodes.size();
10534 }
10535 return true;
10536}
10537
10538void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10539 SDValue OrigLoad, SDValue ExtLoad,
10540 ISD::NodeType ExtType) {
10541 // Extend SetCC uses if necessary.
10542 SDLoc DL(ExtLoad);
10543 for (SDNode *SetCC : SetCCs) {
10544 SmallVector<SDValue, 4> Ops;
10545
10546 for (unsigned j = 0; j != 2; ++j) {
10547 SDValue SOp = SetCC->getOperand(j);
10548 if (SOp == OrigLoad)
10549 Ops.push_back(ExtLoad);
10550 else
10551 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10552 }
10553
10554 Ops.push_back(SetCC->getOperand(2));
10555 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10556 }
10557}
10558
10559// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10560SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10561 SDValue N0 = N->getOperand(0);
10562 EVT DstVT = N->getValueType(0);
10563 EVT SrcVT = N0.getValueType();
10564
10565 assert((N->getOpcode() == ISD::SIGN_EXTEND ||((void)0)
10566 N->getOpcode() == ISD::ZERO_EXTEND) &&((void)0)
10567 "Unexpected node type (not an extend)!")((void)0);
10568
10569 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10570 // For example, on a target with legal v4i32, but illegal v8i32, turn:
10571 // (v8i32 (sext (v8i16 (load x))))
10572 // into:
10573 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10574 // (v4i32 (sextload (x + 16)))))
10575 // Where uses of the original load, i.e.:
10576 // (v8i16 (load x))
10577 // are replaced with:
10578 // (v8i16 (truncate
10579 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10580 // (v4i32 (sextload (x + 16)))))))
10581 //
10582 // This combine is only applicable to illegal, but splittable, vectors.
10583 // All legal types, and illegal non-vector types, are handled elsewhere.
10584 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10585 //
10586 if (N0->getOpcode() != ISD::LOAD)
10587 return SDValue();
10588
10589 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10590
10591 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10592 !N0.hasOneUse() || !LN0->isSimple() ||
10593 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10594 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10595 return SDValue();
10596
10597 SmallVector<SDNode *, 4> SetCCs;
10598 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10599 return SDValue();
10600
10601 ISD::LoadExtType ExtType =
10602 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10603
10604 // Try to split the vector types to get down to legal types.
10605 EVT SplitSrcVT = SrcVT;
10606 EVT SplitDstVT = DstVT;
10607 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10608 SplitSrcVT.getVectorNumElements() > 1) {
10609 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10610 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10611 }
10612
10613 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10614 return SDValue();
10615
10616 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")((void)0);
10617
10618 SDLoc DL(N);
10619 const unsigned NumSplits =
10620 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10621 const unsigned Stride = SplitSrcVT.getStoreSize();
10622 SmallVector<SDValue, 4> Loads;
10623 SmallVector<SDValue, 4> Chains;
10624
10625 SDValue BasePtr = LN0->getBasePtr();
10626 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10627 const unsigned Offset = Idx * Stride;
10628 const Align Align = commonAlignment(LN0->getAlign(), Offset);
10629
10630 SDValue SplitLoad = DAG.getExtLoad(
10631 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10632 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10633 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10634
10635 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10636
10637 Loads.push_back(SplitLoad.getValue(0));
10638 Chains.push_back(SplitLoad.getValue(1));
10639 }
10640
10641 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10642 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10643
10644 // Simplify TF.
10645 AddToWorklist(NewChain.getNode());
10646
10647 CombineTo(N, NewValue);
10648
10649 // Replace uses of the original load (before extension)
10650 // with a truncate of the concatenated sextloaded vectors.
10651 SDValue Trunc =
10652 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10653 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10654 CombineTo(N0.getNode(), Trunc, NewChain);
10655 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10656}
10657
10658// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10659// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10660SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10661 assert(N->getOpcode() == ISD::ZERO_EXTEND)((void)0);
10662 EVT VT = N->getValueType(0);
10663 EVT OrigVT = N->getOperand(0).getValueType();
10664 if (TLI.isZExtFree(OrigVT, VT))
10665 return SDValue();
10666
10667 // and/or/xor
10668 SDValue N0 = N->getOperand(0);
10669 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10670 N0.getOpcode() == ISD::XOR) ||
10671 N0.getOperand(1).getOpcode() != ISD::Constant ||
10672 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10673 return SDValue();
10674
10675 // shl/shr
10676 SDValue N1 = N0->getOperand(0);
10677 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10678 N1.getOperand(1).getOpcode() != ISD::Constant ||
10679 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10680 return SDValue();
10681
10682 // load
10683 if (!isa<LoadSDNode>(N1.getOperand(0)))
10684 return SDValue();
10685 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10686 EVT MemVT = Load->getMemoryVT();
10687 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10688 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10689 return SDValue();
10690
10691
10692 // If the shift op is SHL, the logic op must be AND, otherwise the result
10693 // will be wrong.
10694 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10695 return SDValue();
10696
10697 if (!N0.hasOneUse() || !N1.hasOneUse())
10698 return SDValue();
10699
10700 SmallVector<SDNode*, 4> SetCCs;
10701 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10702 ISD::ZERO_EXTEND, SetCCs, TLI))
10703 return SDValue();
10704
10705 // Actually do the transformation.
10706 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10707 Load->getChain(), Load->getBasePtr(),
10708 Load->getMemoryVT(), Load->getMemOperand());
10709
10710 SDLoc DL1(N1);
10711 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10712 N1.getOperand(1));
10713
10714 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10715 SDLoc DL0(N0);
10716 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10717 DAG.getConstant(Mask, DL0, VT));
10718
10719 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10720 CombineTo(N, And);
10721 if (SDValue(Load, 0).hasOneUse()) {
10722 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10723 } else {
10724 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10725 Load->getValueType(0), ExtLoad);
10726 CombineTo(Load, Trunc, ExtLoad.getValue(1));
10727 }
10728
10729 // N0 is dead at this point.
10730 recursivelyDeleteUnusedNodes(N0.getNode());
10731
10732 return SDValue(N,0); // Return N so it doesn't get rechecked!
10733}
10734
10735/// If we're narrowing or widening the result of a vector select and the final
10736/// size is the same size as a setcc (compare) feeding the select, then try to
10737/// apply the cast operation to the select's operands because matching vector
10738/// sizes for a select condition and other operands should be more efficient.
10739SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10740 unsigned CastOpcode = Cast->getOpcode();
10741 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||((void)0)
10742 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||((void)0)
10743 CastOpcode == ISD::FP_ROUND) &&((void)0)
10744 "Unexpected opcode for vector select narrowing/widening")((void)0);
10745
10746 // We only do this transform before legal ops because the pattern may be
10747 // obfuscated by target-specific operations after legalization. Do not create
10748 // an illegal select op, however, because that may be difficult to lower.
10749 EVT VT = Cast->getValueType(0);
10750 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10751 return SDValue();
10752
10753 SDValue VSel = Cast->getOperand(0);
10754 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10755 VSel.getOperand(0).getOpcode() != ISD::SETCC)
10756 return SDValue();
10757
10758 // Does the setcc have the same vector size as the casted select?
10759 SDValue SetCC = VSel.getOperand(0);
10760 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10761 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10762 return SDValue();
10763
10764 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10765 SDValue A = VSel.getOperand(1);
10766 SDValue B = VSel.getOperand(2);
10767 SDValue CastA, CastB;
10768 SDLoc DL(Cast);
10769 if (CastOpcode == ISD::FP_ROUND) {
10770 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10771 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10772 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10773 } else {
10774 CastA = DAG.getNode(CastOpcode, DL, VT, A);
10775 CastB = DAG.getNode(CastOpcode, DL, VT, B);
10776 }
10777 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10778}
10779
10780// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10781// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10782static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10783 const TargetLowering &TLI, EVT VT,
10784 bool LegalOperations, SDNode *N,
10785 SDValue N0, ISD::LoadExtType ExtLoadType) {
10786 SDNode *N0Node = N0.getNode();
10787 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10788 : ISD::isZEXTLoad(N0Node);
10789 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10790 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10791 return SDValue();
10792
10793 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10794 EVT MemVT = LN0->getMemoryVT();
10795 if ((LegalOperations || !LN0->isSimple() ||
10796 VT.isVector()) &&
10797 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10798 return SDValue();
10799
10800 SDValue ExtLoad =
10801 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10802 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10803 Combiner.CombineTo(N, ExtLoad);
10804 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10805 if (LN0->use_empty())
10806 Combiner.recursivelyDeleteUnusedNodes(LN0);
10807 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10808}
10809
10810// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10811// Only generate vector extloads when 1) they're legal, and 2) they are
10812// deemed desirable by the target.
10813static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10814 const TargetLowering &TLI, EVT VT,
10815 bool LegalOperations, SDNode *N, SDValue N0,
10816 ISD::LoadExtType ExtLoadType,
10817 ISD::NodeType ExtOpc) {
10818 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10819 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10820 ((LegalOperations || VT.isVector() ||
10821 !cast<LoadSDNode>(N0)->isSimple()) &&
10822 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10823 return {};
10824
10825 bool DoXform = true;
10826 SmallVector<SDNode *, 4> SetCCs;
10827 if (!N0.hasOneUse())
10828 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10829 if (VT.isVector())
10830 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10831 if (!DoXform)
10832 return {};
10833
10834 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10835 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10836 LN0->getBasePtr(), N0.getValueType(),
10837 LN0->getMemOperand());
10838 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10839 // If the load value is used only by N, replace it via CombineTo N.
10840 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10841 Combiner.CombineTo(N, ExtLoad);
10842 if (NoReplaceTrunc) {
10843 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10844 Combiner.recursivelyDeleteUnusedNodes(LN0);
10845 } else {
10846 SDValue Trunc =
10847 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10848 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10849 }
10850 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10851}
10852
10853static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10854 const TargetLowering &TLI, EVT VT,
10855 SDNode *N, SDValue N0,
10856 ISD::LoadExtType ExtLoadType,
10857 ISD::NodeType ExtOpc) {
10858 if (!N0.hasOneUse())
10859 return SDValue();
10860
10861 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10862 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10863 return SDValue();
10864
10865 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10866 return SDValue();
10867
10868 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10869 return SDValue();
10870
10871 SDLoc dl(Ld);
10872 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10873 SDValue NewLoad = DAG.getMaskedLoad(
10874 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10875 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10876 ExtLoadType, Ld->isExpandingLoad());
10877 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10878 return NewLoad;
10879}
10880
10881static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10882 bool LegalOperations) {
10883 assert((N->getOpcode() == ISD::SIGN_EXTEND ||((void)0)
10884 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")((void)0);
10885
10886 SDValue SetCC = N->getOperand(0);
10887 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10888 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10889 return SDValue();
10890
10891 SDValue X = SetCC.getOperand(0);
10892 SDValue Ones = SetCC.getOperand(1);
10893 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10894 EVT VT = N->getValueType(0);
10895 EVT XVT = X.getValueType();
10896 // setge X, C is canonicalized to setgt, so we do not need to match that
10897 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10898 // not require the 'not' op.
10899 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10900 // Invert and smear/shift the sign bit:
10901 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10902 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10903 SDLoc DL(N);
10904 unsigned ShCt = VT.getSizeInBits() - 1;
10905 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10906 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10907 SDValue NotX = DAG.getNOT(DL, X, VT);
10908 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10909 auto ShiftOpcode =
10910 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10911 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10912 }
10913 }
10914 return SDValue();
10915}
10916
10917SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10918 SDValue N0 = N->getOperand(0);
10919 if (N0.getOpcode() != ISD::SETCC)
10920 return SDValue();
10921
10922 SDValue N00 = N0.getOperand(0);
10923 SDValue N01 = N0.getOperand(1);
10924 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10925 EVT VT = N->getValueType(0);
10926 EVT N00VT = N00.getValueType();
10927 SDLoc DL(N);
10928
10929 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10930 // the same size as the compared operands. Try to optimize sext(setcc())
10931 // if this is the case.
10932 if (VT.isVector() && !LegalOperations &&
10933 TLI.getBooleanContents(N00VT) ==
10934 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10935 EVT SVT = getSetCCResultType(N00VT);
10936
10937 // If we already have the desired type, don't change it.
10938 if (SVT != N0.getValueType()) {
10939 // We know that the # elements of the results is the same as the
10940 // # elements of the compare (and the # elements of the compare result
10941 // for that matter). Check to see that they are the same size. If so,
10942 // we know that the element size of the sext'd result matches the
10943 // element size of the compare operands.
10944 if (VT.getSizeInBits() == SVT.getSizeInBits())
10945 return DAG.getSetCC(DL, VT, N00, N01, CC);
10946
10947 // If the desired elements are smaller or larger than the source
10948 // elements, we can use a matching integer vector type and then
10949 // truncate/sign extend.
10950 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10951 if (SVT == MatchingVecType) {
10952 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10953 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10954 }
10955 }
10956
10957 // Try to eliminate the sext of a setcc by zexting the compare operands.
10958 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10959 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10960 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10961 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10962 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10963
10964 // We have an unsupported narrow vector compare op that would be legal
10965 // if extended to the destination type. See if the compare operands
10966 // can be freely extended to the destination type.
10967 auto IsFreeToExtend = [&](SDValue V) {
10968 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10969 return true;
10970 // Match a simple, non-extended load that can be converted to a
10971 // legal {z/s}ext-load.
10972 // TODO: Allow widening of an existing {z/s}ext-load?
10973 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10974 ISD::isUNINDEXEDLoad(V.getNode()) &&
10975 cast<LoadSDNode>(V)->isSimple() &&
10976 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10977 return false;
10978
10979 // Non-chain users of this value must either be the setcc in this
10980 // sequence or extends that can be folded into the new {z/s}ext-load.
10981 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10982 UI != UE; ++UI) {
10983 // Skip uses of the chain and the setcc.
10984 SDNode *User = *UI;
10985 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
10986 continue;
10987 // Extra users must have exactly the same cast we are about to create.
10988 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
10989 // is enhanced similarly.
10990 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
10991 return false;
10992 }
10993 return true;
10994 };
10995
10996 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
10997 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
10998 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
10999 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11000 }
11001 }
11002 }
11003
11004 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11005 // Here, T can be 1 or -1, depending on the type of the setcc and
11006 // getBooleanContents().
11007 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11008
11009 // To determine the "true" side of the select, we need to know the high bit
11010 // of the value returned by the setcc if it evaluates to true.
11011 // If the type of the setcc is i1, then the true case of the select is just
11012 // sext(i1 1), that is, -1.
11013 // If the type of the setcc is larger (say, i8) then the value of the high
11014 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11015 // of the appropriate width.
11016 SDValue ExtTrueVal = (SetCCWidth == 1)
11017 ? DAG.getAllOnesConstant(DL, VT)
11018 : DAG.getBoolConstant(true, DL, VT, N00VT);
11019 SDValue Zero = DAG.getConstant(0, DL, VT);
11020 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11021 return SCC;
11022
11023 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11024 EVT SetCCVT = getSetCCResultType(N00VT);
11025 // Don't do this transform for i1 because there's a select transform
11026 // that would reverse it.
11027 // TODO: We should not do this transform at all without a target hook
11028 // because a sext is likely cheaper than a select?
11029 if (SetCCVT.getScalarSizeInBits() != 1 &&
11030 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11031 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11032 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11033 }
11034 }
11035
11036 return SDValue();
11037}
11038
11039SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11040 SDValue N0 = N->getOperand(0);
11041 EVT VT = N->getValueType(0);
11042 SDLoc DL(N);
11043
11044 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11045 return Res;
11046
11047 // fold (sext (sext x)) -> (sext x)
11048 // fold (sext (aext x)) -> (sext x)
11049 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11050 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11051
11052 if (N0.getOpcode() == ISD::TRUNCATE) {
11053 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11054 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11055 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11056 SDNode *oye = N0.getOperand(0).getNode();
11057 if (NarrowLoad.getNode() != N0.getNode()) {
11058 CombineTo(N0.getNode(), NarrowLoad);
11059 // CombineTo deleted the truncate, if needed, but not what's under it.
11060 AddToWorklist(oye);
11061 }
11062 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11063 }
11064
11065 // See if the value being truncated is already sign extended. If so, just
11066 // eliminate the trunc/sext pair.
11067 SDValue Op = N0.getOperand(0);
11068 unsigned OpBits = Op.getScalarValueSizeInBits();
11069 unsigned MidBits = N0.getScalarValueSizeInBits();
11070 unsigned DestBits = VT.getScalarSizeInBits();
11071 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11072
11073 if (OpBits == DestBits) {
11074 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11075 // bits, it is already ready.
11076 if (NumSignBits > DestBits-MidBits)
11077 return Op;
11078 } else if (OpBits < DestBits) {
11079 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11080 // bits, just sext from i32.
11081 if (NumSignBits > OpBits-MidBits)
11082 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11083 } else {
11084 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11085 // bits, just truncate to i32.
11086 if (NumSignBits > OpBits-MidBits)
11087 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11088 }
11089
11090 // fold (sext (truncate x)) -> (sextinreg x).
11091 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11092 N0.getValueType())) {
11093 if (OpBits < DestBits)
11094 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11095 else if (OpBits > DestBits)
11096 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11097 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11098 DAG.getValueType(N0.getValueType()));
11099 }
11100 }
11101
11102 // Try to simplify (sext (load x)).
11103 if (SDValue foldedExt =
11104 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11105 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11106 return foldedExt;
11107
11108 if (SDValue foldedExt =
11109 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11110 ISD::SIGN_EXTEND))
11111 return foldedExt;
11112
11113 // fold (sext (load x)) to multiple smaller sextloads.
11114 // Only on illegal but splittable vectors.
11115 if (SDValue ExtLoad = CombineExtLoad(N))
11116 return ExtLoad;
11117
11118 // Try to simplify (sext (sextload x)).
11119 if (SDValue foldedExt = tryToFoldExtOfExtload(
11120 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11121 return foldedExt;
11122
11123 // fold (sext (and/or/xor (load x), cst)) ->
11124 // (and/or/xor (sextload x), (sext cst))
11125 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11126 N0.getOpcode() == ISD::XOR) &&
11127 isa<LoadSDNode>(N0.getOperand(0)) &&
11128 N0.getOperand(1).getOpcode() == ISD::Constant &&
11129 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11130 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11131 EVT MemVT = LN00->getMemoryVT();
11132 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11133 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11134 SmallVector<SDNode*, 4> SetCCs;
11135 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11136 ISD::SIGN_EXTEND, SetCCs, TLI);
11137 if (DoXform) {
11138 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11139 LN00->getChain(), LN00->getBasePtr(),
11140 LN00->getMemoryVT(),
11141 LN00->getMemOperand());
11142 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11143 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11144 ExtLoad, DAG.getConstant(Mask, DL, VT));
11145 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11146 bool NoReplaceTruncAnd = !N0.hasOneUse();
11147 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11148 CombineTo(N, And);
11149 // If N0 has multiple uses, change other uses as well.
11150 if (NoReplaceTruncAnd) {
11151 SDValue TruncAnd =
11152 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11153 CombineTo(N0.getNode(), TruncAnd);
11154 }
11155 if (NoReplaceTrunc) {
11156 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11157 } else {
11158 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11159 LN00->getValueType(0), ExtLoad);
11160 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11161 }
11162 return SDValue(N,0); // Return N so it doesn't get rechecked!
11163 }
11164 }
11165 }
11166
11167 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11168 return V;
11169
11170 if (SDValue V = foldSextSetcc(N))
11171 return V;
11172
11173 // fold (sext x) -> (zext x) if the sign bit is known zero.
11174 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11175 DAG.SignBitIsZero(N0))
11176 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11177
11178 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11179 return NewVSel;
11180
11181 // Eliminate this sign extend by doing a negation in the destination type:
11182 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11183 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11184 isNullOrNullSplat(N0.getOperand(0)) &&
11185 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11186 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11187 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11188 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11189 }
11190 // Eliminate this sign extend by doing a decrement in the destination type:
11191 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11192 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11193 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11194 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11195 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11196 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11197 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11198 }
11199
11200 // fold sext (not i1 X) -> add (zext i1 X), -1
11201 // TODO: This could be extended to handle bool vectors.
11202 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11203 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11204 TLI.isOperationLegal(ISD::ADD, VT)))) {
11205 // If we can eliminate the 'not', the sext form should be better
11206 if (SDValue NewXor = visitXOR(N0.getNode())) {
11207 // Returning N0 is a form of in-visit replacement that may have
11208 // invalidated N0.
11209 if (NewXor.getNode() == N0.getNode()) {
11210 // Return SDValue here as the xor should have already been replaced in
11211 // this sext.
11212 return SDValue();
11213 } else {
11214 // Return a new sext with the new xor.
11215 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11216 }
11217 }
11218
11219 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11220 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11221 }
11222
11223 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11224 return Res;
11225
11226 return SDValue();
11227}
11228
11229// isTruncateOf - If N is a truncate of some other value, return true, record
11230// the value being truncated in Op and which of Op's bits are zero/one in Known.
11231// This function computes KnownBits to avoid a duplicated call to
11232// computeKnownBits in the caller.
11233static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11234 KnownBits &Known) {
11235 if (N->getOpcode() == ISD::TRUNCATE) {
11236 Op = N->getOperand(0);
11237 Known = DAG.computeKnownBits(Op);
11238 return true;
11239 }
11240
11241 if (N.getOpcode() != ISD::SETCC ||
11242 N.getValueType().getScalarType() != MVT::i1 ||
11243 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11244 return false;
11245
11246 SDValue Op0 = N->getOperand(0);
11247 SDValue Op1 = N->getOperand(1);
11248 assert(Op0.getValueType() == Op1.getValueType())((void)0);
11249
11250 if (isNullOrNullSplat(Op0))
11251 Op = Op1;
11252 else if (isNullOrNullSplat(Op1))
11253 Op = Op0;
11254 else
11255 return false;
11256
11257 Known = DAG.computeKnownBits(Op);
11258
11259 return (Known.Zero | 1).isAllOnesValue();
11260}
11261
11262/// Given an extending node with a pop-count operand, if the target does not
11263/// support a pop-count in the narrow source type but does support it in the
11264/// destination type, widen the pop-count to the destination type.
11265static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11266 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||((void)0)
11267 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")((void)0);
11268
11269 SDValue CtPop = Extend->getOperand(0);
11270 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11271 return SDValue();
11272
11273 EVT VT = Extend->getValueType(0);
11274 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11275 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11276 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11277 return SDValue();
11278
11279 // zext (ctpop X) --> ctpop (zext X)
11280 SDLoc DL(Extend);
11281 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11282 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11283}
11284
11285SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11286 SDValue N0 = N->getOperand(0);
11287 EVT VT = N->getValueType(0);
11288
11289 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11290 return Res;
11291
11292 // fold (zext (zext x)) -> (zext x)
11293 // fold (zext (aext x)) -> (zext x)
11294 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11295 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11296 N0.getOperand(0));
11297
11298 // fold (zext (truncate x)) -> (zext x) or
11299 // (zext (truncate x)) -> (truncate x)
11300 // This is valid when the truncated bits of x are already zero.
11301 SDValue Op;
11302 KnownBits Known;
11303 if (isTruncateOf(DAG, N0, Op, Known)) {
11304 APInt TruncatedBits =
11305 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11306 APInt(Op.getScalarValueSizeInBits(), 0) :
11307 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11308 N0.getScalarValueSizeInBits(),
11309 std::min(Op.getScalarValueSizeInBits(),
11310 VT.getScalarSizeInBits()));
11311 if (TruncatedBits.isSubsetOf(Known.Zero))
11312 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11313 }
11314
11315 // fold (zext (truncate x)) -> (and x, mask)
11316 if (N0.getOpcode() == ISD::TRUNCATE) {
11317 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11318 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11319 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11320 SDNode *oye = N0.getOperand(0).getNode();
11321 if (NarrowLoad.getNode() != N0.getNode()) {
11322 CombineTo(N0.getNode(), NarrowLoad);
11323 // CombineTo deleted the truncate, if needed, but not what's under it.
11324 AddToWorklist(oye);
11325 }
11326 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11327 }
11328
11329 EVT SrcVT = N0.getOperand(0).getValueType();
11330 EVT MinVT = N0.getValueType();
11331
11332 // Try to mask before the extension to avoid having to generate a larger mask,
11333 // possibly over several sub-vectors.
11334 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11335 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11336 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11337 SDValue Op = N0.getOperand(0);
11338 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11339 AddToWorklist(Op.getNode());
11340 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11341 // Transfer the debug info; the new node is equivalent to N0.
11342 DAG.transferDbgValues(N0, ZExtOrTrunc);
11343 return ZExtOrTrunc;
11344 }
11345 }
11346
11347 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11348 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11349 AddToWorklist(Op.getNode());
11350 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11351 // We may safely transfer the debug info describing the truncate node over
11352 // to the equivalent and operation.
11353 DAG.transferDbgValues(N0, And);
11354 return And;
11355 }
11356 }
11357
11358 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11359 // if either of the casts is not free.
11360 if (N0.getOpcode() == ISD::AND &&
11361 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11362 N0.getOperand(1).getOpcode() == ISD::Constant &&
11363 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11364 N0.getValueType()) ||
11365 !TLI.isZExtFree(N0.getValueType(), VT))) {
11366 SDValue X = N0.getOperand(0).getOperand(0);
11367 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11368 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11369 SDLoc DL(N);
11370 return DAG.getNode(ISD::AND, DL, VT,
11371 X, DAG.getConstant(Mask, DL, VT));
11372 }
11373
11374 // Try to simplify (zext (load x)).
11375 if (SDValue foldedExt =
11376 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11377 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11378 return foldedExt;
11379
11380 if (SDValue foldedExt =
11381 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11382 ISD::ZERO_EXTEND))
11383 return foldedExt;
11384
11385 // fold (zext (load x)) to multiple smaller zextloads.
11386 // Only on illegal but splittable vectors.
11387 if (SDValue ExtLoad = CombineExtLoad(N))
11388 return ExtLoad;
11389
11390 // fold (zext (and/or/xor (load x), cst)) ->
11391 // (and/or/xor (zextload x), (zext cst))
11392 // Unless (and (load x) cst) will match as a zextload already and has
11393 // additional users.
11394 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11395 N0.getOpcode() == ISD::XOR) &&
11396 isa<LoadSDNode>(N0.getOperand(0)) &&
11397 N0.getOperand(1).getOpcode() == ISD::Constant &&
11398 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11399 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11400 EVT MemVT = LN00->getMemoryVT();
11401 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11402 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11403 bool DoXform = true;
11404 SmallVector<SDNode*, 4> SetCCs;
11405 if (!N0.hasOneUse()) {
11406 if (N0.getOpcode() == ISD::AND) {
11407 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11408 EVT LoadResultTy = AndC->getValueType(0);
11409 EVT ExtVT;
11410 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11411 DoXform = false;
11412 }
11413 }
11414 if (DoXform)
11415 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11416 ISD::ZERO_EXTEND, SetCCs, TLI);
11417 if (DoXform) {
11418 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11419 LN00->getChain(), LN00->getBasePtr(),
11420 LN00->getMemoryVT(),
11421 LN00->getMemOperand());
11422 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11423 SDLoc DL(N);
11424 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11425 ExtLoad, DAG.getConstant(Mask, DL, VT));
11426 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11427 bool NoReplaceTruncAnd = !N0.hasOneUse();
11428 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11429 CombineTo(N, And);
11430 // If N0 has multiple uses, change other uses as well.
11431 if (NoReplaceTruncAnd) {
11432 SDValue TruncAnd =
11433 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11434 CombineTo(N0.getNode(), TruncAnd);
11435 }
11436 if (NoReplaceTrunc) {
11437 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11438 } else {
11439 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11440 LN00->getValueType(0), ExtLoad);
11441 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11442 }
11443 return SDValue(N,0); // Return N so it doesn't get rechecked!
11444 }
11445 }
11446 }
11447
11448 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11449 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11450 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11451 return ZExtLoad;
11452
11453 // Try to simplify (zext (zextload x)).
11454 if (SDValue foldedExt = tryToFoldExtOfExtload(
11455 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11456 return foldedExt;
11457
11458 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11459 return V;
11460
11461 if (N0.getOpcode() == ISD::SETCC) {
11462 // Only do this before legalize for now.
11463 if (!LegalOperations && VT.isVector() &&
11464 N0.getValueType().getVectorElementType() == MVT::i1) {
11465 EVT N00VT = N0.getOperand(0).getValueType();
11466 if (getSetCCResultType(N00VT) == N0.getValueType())
11467 return SDValue();
11468
11469 // We know that the # elements of the results is the same as the #
11470 // elements of the compare (and the # elements of the compare result for
11471 // that matter). Check to see that they are the same size. If so, we know
11472 // that the element size of the sext'd result matches the element size of
11473 // the compare operands.
11474 SDLoc DL(N);
11475 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11476 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11477 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11478 N0.getOperand(1), N0.getOperand(2));
11479 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11480 }
11481
11482 // If the desired elements are smaller or larger than the source
11483 // elements we can use a matching integer vector type and then
11484 // truncate/any extend followed by zext_in_reg.
11485 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11486 SDValue VsetCC =
11487 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11488 N0.getOperand(1), N0.getOperand(2));
11489 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11490 N0.getValueType());
11491 }
11492
11493 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11494 SDLoc DL(N);
11495 EVT N0VT = N0.getValueType();
11496 EVT N00VT = N0.getOperand(0).getValueType();
11497 if (SDValue SCC = SimplifySelectCC(
11498 DL, N0.getOperand(0), N0.getOperand(1),
11499 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11500 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11501 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11502 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11503 }
11504
11505 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11506 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11507 isa<ConstantSDNode>(N0.getOperand(1)) &&
11508 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11509 N0.hasOneUse()) {
11510 SDValue ShAmt = N0.getOperand(1);
11511 if (N0.getOpcode() == ISD::SHL) {
11512 SDValue InnerZExt = N0.getOperand(0);
11513 // If the original shl may be shifting out bits, do not perform this
11514 // transformation.
11515 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11516 InnerZExt.getOperand(0).getValueSizeInBits();
11517 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11518 return SDValue();
11519 }
11520
11521 SDLoc DL(N);
11522
11523 // Ensure that the shift amount is wide enough for the shifted value.
11524 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11525 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11526
11527 return DAG.getNode(N0.getOpcode(), DL, VT,
11528 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11529 ShAmt);
11530 }
11531
11532 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11533 return NewVSel;
11534
11535 if (SDValue NewCtPop = widenCtPop(N, DAG))
11536 return NewCtPop;
11537
11538 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11539 return Res;
11540
11541 return SDValue();
11542}
11543
11544SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11545 SDValue N0 = N->getOperand(0);
11546 EVT VT = N->getValueType(0);
11547
11548 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11549 return Res;
11550
11551 // fold (aext (aext x)) -> (aext x)
11552 // fold (aext (zext x)) -> (zext x)
11553 // fold (aext (sext x)) -> (sext x)
11554 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11555 N0.getOpcode() == ISD::ZERO_EXTEND ||
11556 N0.getOpcode() == ISD::SIGN_EXTEND)
11557 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11558
11559 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11560 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11561 if (N0.getOpcode() == ISD::TRUNCATE) {
11562 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11563 SDNode *oye = N0.getOperand(0).getNode();
11564 if (NarrowLoad.getNode() != N0.getNode()) {
11565 CombineTo(N0.getNode(), NarrowLoad);
11566 // CombineTo deleted the truncate, if needed, but not what's under it.
11567 AddToWorklist(oye);
11568 }
11569 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11570 }
11571 }
11572
11573 // fold (aext (truncate x))
11574 if (N0.getOpcode() == ISD::TRUNCATE)
11575 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11576
11577 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11578 // if the trunc is not free.
11579 if (N0.getOpcode() == ISD::AND &&
11580 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11581 N0.getOperand(1).getOpcode() == ISD::Constant &&
11582 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11583 N0.getValueType())) {
11584 SDLoc DL(N);
11585 SDValue X = N0.getOperand(0).getOperand(0);
11586 X = DAG.getAnyExtOrTrunc(X, DL, VT);
11587 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11588 return DAG.getNode(ISD::AND, DL, VT,
11589 X, DAG.getConstant(Mask, DL, VT));
11590 }
11591
11592 // fold (aext (load x)) -> (aext (truncate (extload x)))
11593 // None of the supported targets knows how to perform load and any_ext
11594 // on vectors in one instruction, so attempt to fold to zext instead.
11595 if (VT.isVector()) {
11596 // Try to simplify (zext (load x)).
11597 if (SDValue foldedExt =
11598 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11599 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11600 return foldedExt;
11601 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11602 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11603 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11604 bool DoXform = true;
11605 SmallVector<SDNode *, 4> SetCCs;
11606 if (!N0.hasOneUse())
11607 DoXform =
11608 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11609 if (DoXform) {
11610 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11611 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11612 LN0->getChain(), LN0->getBasePtr(),
11613 N0.getValueType(), LN0->getMemOperand());
11614 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11615 // If the load value is used only by N, replace it via CombineTo N.
11616 bool NoReplaceTrunc = N0.hasOneUse();
11617 CombineTo(N, ExtLoad);
11618 if (NoReplaceTrunc) {
11619 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11620 recursivelyDeleteUnusedNodes(LN0);
11621 } else {
11622 SDValue Trunc =
11623 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11624 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11625 }
11626 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11627 }
11628 }
11629
11630 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11631 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11632 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
11633 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11634 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11635 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11636 ISD::LoadExtType ExtType = LN0->getExtensionType();
11637 EVT MemVT = LN0->getMemoryVT();
11638 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11639 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11640 VT, LN0->getChain(), LN0->getBasePtr(),
11641 MemVT, LN0->getMemOperand());
11642 CombineTo(N, ExtLoad);
11643 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11644 recursivelyDeleteUnusedNodes(LN0);
11645 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11646 }
11647 }
11648
11649 if (N0.getOpcode() == ISD::SETCC) {
11650 // For vectors:
11651 // aext(setcc) -> vsetcc
11652 // aext(setcc) -> truncate(vsetcc)
11653 // aext(setcc) -> aext(vsetcc)
11654 // Only do this before legalize for now.
11655 if (VT.isVector() && !LegalOperations) {
11656 EVT N00VT = N0.getOperand(0).getValueType();
11657 if (getSetCCResultType(N00VT) == N0.getValueType())
11658 return SDValue();
11659
11660 // We know that the # elements of the results is the same as the
11661 // # elements of the compare (and the # elements of the compare result
11662 // for that matter). Check to see that they are the same size. If so,
11663 // we know that the element size of the sext'd result matches the
11664 // element size of the compare operands.
11665 if (VT.getSizeInBits() == N00VT.getSizeInBits())
11666 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11667 N0.getOperand(1),
11668 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11669
11670 // If the desired elements are smaller or larger than the source
11671 // elements we can use a matching integer vector type and then
11672 // truncate/any extend
11673 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11674 SDValue VsetCC =
11675 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11676 N0.getOperand(1),
11677 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11678 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11679 }
11680
11681 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11682 SDLoc DL(N);
11683 if (SDValue SCC = SimplifySelectCC(
11684 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11685 DAG.getConstant(0, DL, VT),
11686 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11687 return SCC;
11688 }
11689
11690 if (SDValue NewCtPop = widenCtPop(N, DAG))
11691 return NewCtPop;
11692
11693 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11694 return Res;
11695
11696 return SDValue();
11697}
11698
11699SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11700 unsigned Opcode = N->getOpcode();
11701 SDValue N0 = N->getOperand(0);
11702 SDValue N1 = N->getOperand(1);
11703 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11704
11705 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11706 if (N0.getOpcode() == Opcode &&
11707 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11708 return N0;
11709
11710 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11711 N0.getOperand(0).getOpcode() == Opcode) {
11712 // We have an assert, truncate, assert sandwich. Make one stronger assert
11713 // by asserting on the smallest asserted type to the larger source type.
11714 // This eliminates the later assert:
11715 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11716 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11717 SDValue BigA = N0.getOperand(0);
11718 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11719 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((void)0)
11720 "Asserting zero/sign-extended bits to a type larger than the "((void)0)
11721 "truncated destination does not provide information")((void)0);
11722
11723 SDLoc DL(N);
11724 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11725 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11726 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11727 BigA.getOperand(0), MinAssertVTVal);
11728 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11729 }
11730
11731 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11732 // than X. Just move the AssertZext in front of the truncate and drop the
11733 // AssertSExt.
11734 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11735 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11736 Opcode == ISD::AssertZext) {
11737 SDValue BigA = N0.getOperand(0);
11738 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11739 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((void)0)
11740 "Asserting zero/sign-extended bits to a type larger than the "((void)0)
11741 "truncated destination does not provide information")((void)0);
11742
11743 if (AssertVT.bitsLT(BigA_AssertVT)) {
11744 SDLoc DL(N);
11745 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11746 BigA.getOperand(0), N1);
11747 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11748 }
11749 }
11750
11751 return SDValue();
11752}
11753
11754SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11755 SDLoc DL(N);
11756
11757 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11758 SDValue N0 = N->getOperand(0);
11759
11760 // Fold (assertalign (assertalign x, AL0), AL1) ->
11761 // (assertalign x, max(AL0, AL1))
11762 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11763 return DAG.getAssertAlign(DL, N0.getOperand(0),
11764 std::max(AL, AAN->getAlign()));
11765
11766 // In rare cases, there are trivial arithmetic ops in source operands. Sink
11767 // this assert down to source operands so that those arithmetic ops could be
11768 // exposed to the DAG combining.
11769 switch (N0.getOpcode()) {
11770 default:
11771 break;
11772 case ISD::ADD:
11773 case ISD::SUB: {
11774 unsigned AlignShift = Log2(AL);
11775 SDValue LHS = N0.getOperand(0);
11776 SDValue RHS = N0.getOperand(1);
11777 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11778 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11779 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11780 if (LHSAlignShift < AlignShift)
11781 LHS = DAG.getAssertAlign(DL, LHS, AL);
11782 if (RHSAlignShift < AlignShift)
11783 RHS = DAG.getAssertAlign(DL, RHS, AL);
11784 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11785 }
11786 break;
11787 }
11788 }
11789
11790 return SDValue();
11791}
11792
11793/// If the result of a wider load is shifted to right of N bits and then
11794/// truncated to a narrower type and where N is a multiple of number of bits of
11795/// the narrower type, transform it to a narrower load from address + N / num of
11796/// bits of new type. Also narrow the load if the result is masked with an AND
11797/// to effectively produce a smaller type. If the result is to be extended, also
11798/// fold the extension to form a extending load.
11799SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11800 unsigned Opc = N->getOpcode();
11801
11802 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11803 SDValue N0 = N->getOperand(0);
11804 EVT VT = N->getValueType(0);
11805 EVT ExtVT = VT;
11806
11807 // This transformation isn't valid for vector loads.
11808 if (VT.isVector())
11809 return SDValue();
11810
11811 unsigned ShAmt = 0;
11812 bool HasShiftedOffset = false;
11813 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11814 // extended to VT.
11815 if (Opc == ISD::SIGN_EXTEND_INREG) {
11816 ExtType = ISD::SEXTLOAD;
11817 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11818 } else if (Opc == ISD::SRL) {
11819 // Another special-case: SRL is basically zero-extending a narrower value,
11820 // or it maybe shifting a higher subword, half or byte into the lowest
11821 // bits.
11822 ExtType = ISD::ZEXTLOAD;
11823 N0 = SDValue(N, 0);
11824
11825 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11826 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11827 if (!N01 || !LN0)
11828 return SDValue();
11829
11830 uint64_t ShiftAmt = N01->getZExtValue();
11831 uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11832 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11833 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11834 else
11835 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11836 VT.getScalarSizeInBits() - ShiftAmt);
11837 } else if (Opc == ISD::AND) {
11838 // An AND with a constant mask is the same as a truncate + zero-extend.
11839 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11840 if (!AndC)
11841 return SDValue();
11842
11843 const APInt &Mask = AndC->getAPIntValue();
11844 unsigned ActiveBits = 0;
11845 if (Mask.isMask()) {
11846 ActiveBits = Mask.countTrailingOnes();
11847 } else if (Mask.isShiftedMask()) {
11848 ShAmt = Mask.countTrailingZeros();
11849 APInt ShiftedMask = Mask.lshr(ShAmt);
11850 ActiveBits = ShiftedMask.countTrailingOnes();
11851 HasShiftedOffset = true;
11852 } else
11853 return SDValue();
11854
11855 ExtType = ISD::ZEXTLOAD;
11856 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11857 }
11858
11859 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11860 SDValue SRL = N0;
11861 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11862 ShAmt = ConstShift->getZExtValue();
11863 unsigned EVTBits = ExtVT.getScalarSizeInBits();
11864 // Is the shift amount a multiple of size of VT?
11865 if ((ShAmt & (EVTBits-1)) == 0) {
11866 N0 = N0.getOperand(0);
11867 // Is the load width a multiple of size of VT?
11868 if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11869 return SDValue();
11870 }
11871
11872 // At this point, we must have a load or else we can't do the transform.
11873 auto *LN0 = dyn_cast<LoadSDNode>(N0);
11874 if (!LN0) return SDValue();
11875
11876 // Because a SRL must be assumed to *need* to zero-extend the high bits
11877 // (as opposed to anyext the high bits), we can't combine the zextload
11878 // lowering of SRL and an sextload.
11879 if (LN0->getExtensionType() == ISD::SEXTLOAD)
11880 return SDValue();
11881
11882 // If the shift amount is larger than the input type then we're not
11883 // accessing any of the loaded bytes. If the load was a zextload/extload
11884 // then the result of the shift+trunc is zero/undef (handled elsewhere).
11885 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11886 return SDValue();
11887
11888 // If the SRL is only used by a masking AND, we may be able to adjust
11889 // the ExtVT to make the AND redundant.
11890 SDNode *Mask = *(SRL->use_begin());
11891 if (Mask->getOpcode() == ISD::AND &&
11892 isa<ConstantSDNode>(Mask->getOperand(1))) {
11893 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11894 if (ShiftMask.isMask()) {
11895 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11896 ShiftMask.countTrailingOnes());
11897 // If the mask is smaller, recompute the type.
11898 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11899 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11900 ExtVT = MaskedVT;
11901 }
11902 }
11903 }
11904 }
11905
11906 // If the load is shifted left (and the result isn't shifted back right),
11907 // we can fold the truncate through the shift.
11908 unsigned ShLeftAmt = 0;
11909 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11910 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11911 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11912 ShLeftAmt = N01->getZExtValue();
11913 N0 = N0.getOperand(0);
11914 }
11915 }
11916
11917 // If we haven't found a load, we can't narrow it.
11918 if (!isa<LoadSDNode>(N0))
11919 return SDValue();
11920
11921 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11922 // Reducing the width of a volatile load is illegal. For atomics, we may be
11923 // able to reduce the width provided we never widen again. (see D66309)
11924 if (!LN0->isSimple() ||
11925 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11926 return SDValue();
11927
11928 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11929 unsigned LVTStoreBits =
11930 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11931 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11932 return LVTStoreBits - EVTStoreBits - ShAmt;
11933 };
11934
11935 // For big endian targets, we need to adjust the offset to the pointer to
11936 // load the correct bytes.
11937 if (DAG.getDataLayout().isBigEndian())
11938 ShAmt = AdjustBigEndianShift(ShAmt);
11939
11940 uint64_t PtrOff = ShAmt / 8;
11941 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11942 SDLoc DL(LN0);
11943 // The original load itself didn't wrap, so an offset within it doesn't.
11944 SDNodeFlags Flags;
11945 Flags.setNoUnsignedWrap(true);
11946 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11947 TypeSize::Fixed(PtrOff), DL, Flags);
11948 AddToWorklist(NewPtr.getNode());
11949
11950 SDValue Load;
11951 if (ExtType == ISD::NON_EXTLOAD)
11952 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11953 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11954 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11955 else
11956 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11957 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11958 NewAlign, LN0->getMemOperand()->getFlags(),
11959 LN0->getAAInfo());
11960
11961 // Replace the old load's chain with the new load's chain.
11962 WorklistRemover DeadNodes(*this);
11963 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11964
11965 // Shift the result left, if we've swallowed a left shift.
11966 SDValue Result = Load;
11967 if (ShLeftAmt != 0) {
11968 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11969 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11970 ShImmTy = VT;
11971 // If the shift amount is as large as the result size (but, presumably,
11972 // no larger than the source) then the useful bits of the result are
11973 // zero; we can't simply return the shortened shift, because the result
11974 // of that operation is undefined.
11975 if (ShLeftAmt >= VT.getScalarSizeInBits())
11976 Result = DAG.getConstant(0, DL, VT);
11977 else
11978 Result = DAG.getNode(ISD::SHL, DL, VT,
11979 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11980 }
11981
11982 if (HasShiftedOffset) {
11983 // Recalculate the shift amount after it has been altered to calculate
11984 // the offset.
11985 if (DAG.getDataLayout().isBigEndian())
11986 ShAmt = AdjustBigEndianShift(ShAmt);
11987
11988 // We're using a shifted mask, so the load now has an offset. This means
11989 // that data has been loaded into the lower bytes than it would have been
11990 // before, so we need to shl the loaded data into the correct position in the
11991 // register.
11992 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11993 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11994 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11995 }
11996
11997 // Return the new loaded value.
11998 return Result;
11999}
12000
12001SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12002 SDValue N0 = N->getOperand(0);
12003 SDValue N1 = N->getOperand(1);
12004 EVT VT = N->getValueType(0);
12005 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12006 unsigned VTBits = VT.getScalarSizeInBits();
12007 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12008
12009 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12010 if (N0.isUndef())
12011 return DAG.getConstant(0, SDLoc(N), VT);
12012
12013 // fold (sext_in_reg c1) -> c1
12014 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12015 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12016
12017 // If the input is already sign extended, just drop the extension.
12018 if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
12019 return N0;
12020
12021 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12022 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12023 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12024 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12025 N1);
12026
12027 // fold (sext_in_reg (sext x)) -> (sext x)
12028 // fold (sext_in_reg (aext x)) -> (sext x)
12029 // if x is small enough or if we know that x has more than 1 sign bit and the
12030 // sign_extend_inreg is extending from one of them.
12031 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12032 SDValue N00 = N0.getOperand(0);
12033 unsigned N00Bits = N00.getScalarValueSizeInBits();
12034 if ((N00Bits <= ExtVTBits ||
12035 (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
12036 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12037 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12038 }
12039
12040 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12041 // if x is small enough or if we know that x has more than 1 sign bit and the
12042 // sign_extend_inreg is extending from one of them.
12043 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12044 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12045 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12046 SDValue N00 = N0.getOperand(0);
12047 unsigned N00Bits = N00.getScalarValueSizeInBits();
12048 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12049 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12050 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12051 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12052 if ((N00Bits == ExtVTBits ||
12053 (!IsZext && (N00Bits < ExtVTBits ||
12054 (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
12055 ExtVTBits))) &&
12056 (!LegalOperations ||
12057 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12058 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12059 }
12060
12061 // fold (sext_in_reg (zext x)) -> (sext x)
12062 // iff we are extending the source sign bit.
12063 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12064 SDValue N00 = N0.getOperand(0);
12065 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12066 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12067 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12068 }
12069
12070 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12071 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12072 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12073
12074 // fold operands of sext_in_reg based on knowledge that the top bits are not
12075 // demanded.
12076 if (SimplifyDemandedBits(SDValue(N, 0)))
12077 return SDValue(N, 0);
12078
12079 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12080 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12081 if (SDValue NarrowLoad = ReduceLoadWidth(N))
12082 return NarrowLoad;
12083
12084 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12085 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12086 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12087 if (N0.getOpcode() == ISD::SRL) {
12088 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12089 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12090 // We can turn this into an SRA iff the input to the SRL is already sign
12091 // extended enough.
12092 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12093 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12094 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12095 N0.getOperand(1));
12096 }
12097 }
12098
12099 // fold (sext_inreg (extload x)) -> (sextload x)
12100 // If sextload is not supported by target, we can only do the combine when
12101 // load has one use. Doing otherwise can block folding the extload with other
12102 // extends that the target does support.
12103 if (ISD::isEXTLoad(N0.getNode()) &&
12104 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12105 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12106 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12107 N0.hasOneUse()) ||
12108 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12109 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12110 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12111 LN0->getChain(),
12112 LN0->getBasePtr(), ExtVT,
12113 LN0->getMemOperand());
12114 CombineTo(N, ExtLoad);
12115 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12116 AddToWorklist(ExtLoad.getNode());
12117 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12118 }
12119
12120 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12121 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12122 N0.hasOneUse() &&
12123 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12124 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12125 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12126 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12127 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12128 LN0->getChain(),
12129 LN0->getBasePtr(), ExtVT,
12130 LN0->getMemOperand());
12131 CombineTo(N, ExtLoad);
12132 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12133 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12134 }
12135
12136 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12137 // ignore it if the masked load is already sign extended
12138 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12139 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12140 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12141 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12142 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12143 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12144 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12145 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12146 CombineTo(N, ExtMaskedLoad);
12147 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12148 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12149 }
12150 }
12151
12152 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12153 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12154 if (SDValue(GN0, 0).hasOneUse() &&
12155 ExtVT == GN0->getMemoryVT() &&
12156 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12157 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12158 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12159
12160 SDValue ExtLoad = DAG.getMaskedGather(
12161 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12162 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12163
12164 CombineTo(N, ExtLoad);
12165 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12166 AddToWorklist(ExtLoad.getNode());
12167 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12168 }
12169 }
12170
12171 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12172 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12173 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12174 N0.getOperand(1), false))
12175 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12176 }
12177
12178 return SDValue();
12179}
12180
12181SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12182 SDValue N0 = N->getOperand(0);
12183 EVT VT = N->getValueType(0);
12184
12185 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12186 if (N0.isUndef())
12187 return DAG.getConstant(0, SDLoc(N), VT);
12188
12189 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12190 return Res;
12191
12192 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12193 return SDValue(N, 0);
12194
12195 return SDValue();
12196}
12197
12198SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12199 SDValue N0 = N->getOperand(0);
12200 EVT VT = N->getValueType(0);
12201 EVT SrcVT = N0.getValueType();
12202 bool isLE = DAG.getDataLayout().isLittleEndian();
12203
12204 // noop truncate
12205 if (SrcVT == VT)
12206 return N0;
12207
12208 // fold (truncate (truncate x)) -> (truncate x)
12209 if (N0.getOpcode() == ISD::TRUNCATE)
12210 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12211
12212 // fold (truncate c1) -> c1
12213 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12214 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12215 if (C.getNode() != N)
12216 return C;
12217 }
12218
12219 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12220 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12221 N0.getOpcode() == ISD::SIGN_EXTEND ||
12222 N0.getOpcode() == ISD::ANY_EXTEND) {
12223 // if the source is smaller than the dest, we still need an extend.
12224 if (N0.getOperand(0).getValueType().bitsLT(VT))
12225 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12226 // if the source is larger than the dest, than we just need the truncate.
12227 if (N0.getOperand(0).getValueType().bitsGT(VT))
12228 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12229 // if the source and dest are the same type, we can drop both the extend
12230 // and the truncate.
12231 return N0.getOperand(0);
12232 }
12233
12234 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12235 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12236 return SDValue();
12237
12238 // Fold extract-and-trunc into a narrow extract. For example:
12239 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12240 // i32 y = TRUNCATE(i64 x)
12241 // -- becomes --
12242 // v16i8 b = BITCAST (v2i64 val)
12243 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12244 //
12245 // Note: We only run this optimization after type legalization (which often
12246 // creates this pattern) and before operation legalization after which
12247 // we need to be more careful about the vector instructions that we generate.
12248 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12249 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12250 EVT VecTy = N0.getOperand(0).getValueType();
12251 EVT ExTy = N0.getValueType();
12252 EVT TrTy = N->getValueType(0);
12253
12254 auto EltCnt = VecTy.getVectorElementCount();
12255 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12256 auto NewEltCnt = EltCnt * SizeRatio;
12257
12258 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12259 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")((void)0);
12260
12261 SDValue EltNo = N0->getOperand(1);
12262 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12263 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12264 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12265
12266 SDLoc DL(N);
12267 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12268 DAG.getBitcast(NVT, N0.getOperand(0)),
12269 DAG.getVectorIdxConstant(Index, DL));
12270 }
12271 }
12272
12273 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12274 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12275 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12276 TLI.isTruncateFree(SrcVT, VT)) {
12277 SDLoc SL(N0);
12278 SDValue Cond = N0.getOperand(0);
12279 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12280 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12281 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12282 }
12283 }
12284
12285 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12286 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12287 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12288 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12289 SDValue Amt = N0.getOperand(1);
12290 KnownBits Known = DAG.computeKnownBits(Amt);
12291 unsigned Size = VT.getScalarSizeInBits();
12292 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12293 SDLoc SL(N);
12294 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12295
12296 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12297 if (AmtVT != Amt.getValueType()) {
12298 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12299 AddToWorklist(Amt.getNode());
12300 }
12301 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12302 }
12303 }
12304
12305 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12306 return V;
12307
12308 // Attempt to pre-truncate BUILD_VECTOR sources.
12309 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12310 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12311 // Avoid creating illegal types if running after type legalizer.
12312 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12313 SDLoc DL(N);
12314 EVT SVT = VT.getScalarType();
12315 SmallVector<SDValue, 8> TruncOps;
12316 for (const SDValue &Op : N0->op_values()) {
12317 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12318 TruncOps.push_back(TruncOp);
12319 }
12320 return DAG.getBuildVector(VT, DL, TruncOps);
12321 }
12322
12323 // Fold a series of buildvector, bitcast, and truncate if possible.
12324 // For example fold
12325 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12326 // (2xi32 (buildvector x, y)).
12327 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12328 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12329 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12330 N0.getOperand(0).hasOneUse()) {
12331 SDValue BuildVect = N0.getOperand(0);
12332 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12333 EVT TruncVecEltTy = VT.getVectorElementType();
12334
12335 // Check that the element types match.
12336 if (BuildVectEltTy == TruncVecEltTy) {
12337 // Now we only need to compute the offset of the truncated elements.
12338 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12339 unsigned TruncVecNumElts = VT.getVectorNumElements();
12340 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12341
12342 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&((void)0)
12343 "Invalid number of elements")((void)0);
12344
12345 SmallVector<SDValue, 8> Opnds;
12346 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12347 Opnds.push_back(BuildVect.getOperand(i));
12348
12349 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12350 }
12351 }
12352
12353 // See if we can simplify the input to this truncate through knowledge that
12354 // only the low bits are being used.
12355 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12356 // Currently we only perform this optimization on scalars because vectors
12357 // may have different active low bits.
12358 if (!VT.isVector()) {
12359 APInt Mask =
12360 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12361 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12362 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12363 }
12364
12365 // fold (truncate (load x)) -> (smaller load x)
12366 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12367 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12368 if (SDValue Reduced = ReduceLoadWidth(N))
12369 return Reduced;
12370
12371 // Handle the case where the load remains an extending load even
12372 // after truncation.
12373 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12374 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12375 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12376 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12377 VT, LN0->getChain(), LN0->getBasePtr(),
12378 LN0->getMemoryVT(),
12379 LN0->getMemOperand());
12380 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12381 return NewLoad;
12382 }
12383 }
12384 }
12385
12386 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12387 // where ... are all 'undef'.
12388 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12389 SmallVector<EVT, 8> VTs;
12390 SDValue V;
12391 unsigned Idx = 0;
12392 unsigned NumDefs = 0;
12393
12394 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12395 SDValue X = N0.getOperand(i);
12396 if (!X.isUndef()) {
12397 V = X;
12398 Idx = i;
12399 NumDefs++;
12400 }
12401 // Stop if more than one members are non-undef.
12402 if (NumDefs > 1)
12403 break;
12404
12405 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12406 VT.getVectorElementType(),
12407 X.getValueType().getVectorElementCount()));
12408 }
12409
12410 if (NumDefs == 0)
12411 return DAG.getUNDEF(VT);
12412
12413 if (NumDefs == 1) {
12414 assert(V.getNode() && "The single defined operand is empty!")((void)0);
12415 SmallVector<SDValue, 8> Opnds;
12416 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12417 if (i != Idx) {
12418 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12419 continue;
12420 }
12421 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12422 AddToWorklist(NV.getNode());
12423 Opnds.push_back(NV);
12424 }
12425 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12426 }
12427 }
12428
12429 // Fold truncate of a bitcast of a vector to an extract of the low vector
12430 // element.
12431 //
12432 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12433 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12434 SDValue VecSrc = N0.getOperand(0);
12435 EVT VecSrcVT = VecSrc.getValueType();
12436 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12437 (!LegalOperations ||
12438 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12439 SDLoc SL(N);
12440
12441 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12442 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12443 DAG.getVectorIdxConstant(Idx, SL));
12444 }
12445 }
12446
12447 // Simplify the operands using demanded-bits information.
12448 if (SimplifyDemandedBits(SDValue(N, 0)))
12449 return SDValue(N, 0);
12450
12451 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12452 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12453 // When the adde's carry is not used.
12454 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12455 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12456 // We only do for addcarry before legalize operation
12457 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12458 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12459 SDLoc SL(N);
12460 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12461 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12462 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12463 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12464 }
12465
12466 // fold (truncate (extract_subvector(ext x))) ->
12467 // (extract_subvector x)
12468 // TODO: This can be generalized to cover cases where the truncate and extract
12469 // do not fully cancel each other out.
12470 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12471 SDValue N00 = N0.getOperand(0);
12472 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12473 N00.getOpcode() == ISD::ZERO_EXTEND ||
12474 N00.getOpcode() == ISD::ANY_EXTEND) {
12475 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12476 VT.getVectorElementType())
12477 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12478 N00.getOperand(0), N0.getOperand(1));
12479 }
12480 }
12481
12482 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12483 return NewVSel;
12484
12485 // Narrow a suitable binary operation with a non-opaque constant operand by
12486 // moving it ahead of the truncate. This is limited to pre-legalization
12487 // because targets may prefer a wider type during later combines and invert
12488 // this transform.
12489 switch (N0.getOpcode()) {
12490 case ISD::ADD:
12491 case ISD::SUB:
12492 case ISD::MUL:
12493 case ISD::AND:
12494 case ISD::OR:
12495 case ISD::XOR:
12496 if (!LegalOperations && N0.hasOneUse() &&
12497 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12498 isConstantOrConstantVector(N0.getOperand(1), true))) {
12499 // TODO: We already restricted this to pre-legalization, but for vectors
12500 // we are extra cautious to not create an unsupported operation.
12501 // Target-specific changes are likely needed to avoid regressions here.
12502 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12503 SDLoc DL(N);
12504 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12505 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12506 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12507 }
12508 }
12509 break;
12510 case ISD::USUBSAT:
12511 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12512 // enough to know that the upper bits are zero we must ensure that we don't
12513 // introduce an extra truncate.
12514 if (!LegalOperations && N0.hasOneUse() &&
12515 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12516 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12517 VT.getScalarSizeInBits() &&
12518 hasOperation(N0.getOpcode(), VT)) {
12519 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12520 DAG, SDLoc(N));
12521 }
12522 break;
12523 }
12524
12525 return SDValue();
12526}
12527
12528static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12529 SDValue Elt = N->getOperand(i);
12530 if (Elt.getOpcode() != ISD::MERGE_VALUES)
12531 return Elt.getNode();
12532 return Elt.getOperand(Elt.getResNo()).getNode();
12533}
12534
12535/// build_pair (load, load) -> load
12536/// if load locations are consecutive.
12537SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12538 assert(N->getOpcode() == ISD::BUILD_PAIR)((void)0);
12539
12540 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12541 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12542
12543 // A BUILD_PAIR is always having the least significant part in elt 0 and the
12544 // most significant part in elt 1. So when combining into one large load, we
12545 // need to consider the endianness.
12546 if (DAG.getDataLayout().isBigEndian())
12547 std::swap(LD1, LD2);
12548
12549 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
12550 LD1->getAddressSpace() != LD2->getAddressSpace())
12551 return SDValue();
12552 EVT LD1VT = LD1->getValueType(0);
12553 unsigned LD1Bytes = LD1VT.getStoreSize();
12554 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12555 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12556 Align Alignment = LD1->getAlign();
12557 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12558 VT.getTypeForEVT(*DAG.getContext()));
12559
12560 if (NewAlign <= Alignment &&
12561 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12562 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12563 LD1->getPointerInfo(), Alignment);
12564 }
12565
12566 return SDValue();
12567}
12568
12569static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12570 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12571 // and Lo parts; on big-endian machines it doesn't.
12572 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12573}
12574
12575static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12576 const TargetLowering &TLI) {
12577 // If this is not a bitcast to an FP type or if the target doesn't have
12578 // IEEE754-compliant FP logic, we're done.
12579 EVT VT = N->getValueType(0);
12580 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12581 return SDValue();
12582
12583 // TODO: Handle cases where the integer constant is a different scalar
12584 // bitwidth to the FP.
12585 SDValue N0 = N->getOperand(0);
12586 EVT SourceVT = N0.getValueType();
12587 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12588 return SDValue();
12589
12590 unsigned FPOpcode;
12591 APInt SignMask;
12592 switch (N0.getOpcode()) {
12593 case ISD::AND:
12594 FPOpcode = ISD::FABS;
12595 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12596 break;
12597 case ISD::XOR:
12598 FPOpcode = ISD::FNEG;
12599 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12600 break;
12601 case ISD::OR:
12602 FPOpcode = ISD::FABS;
12603 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12604 break;
12605 default:
12606 return SDValue();
12607 }
12608
12609 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12610 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12611 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12612 // fneg (fabs X)
12613 SDValue LogicOp0 = N0.getOperand(0);
12614 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12615 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12616 LogicOp0.getOpcode() == ISD::BITCAST &&
12617 LogicOp0.getOperand(0).getValueType() == VT) {
12618 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12619 NumFPLogicOpsConv++;
12620 if (N0.getOpcode() == ISD::OR)
12621 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12622 return FPOp;
12623 }
12624
12625 return SDValue();
12626}
12627
12628SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12629 SDValue N0 = N->getOperand(0);
12630 EVT VT = N->getValueType(0);
12631
12632 if (N0.isUndef())
12633 return DAG.getUNDEF(VT);
12634
12635 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12636 // Only do this before legalize types, unless both types are integer and the
12637 // scalar type is legal. Only do this before legalize ops, since the target
12638 // maybe depending on the bitcast.
12639 // First check to see if this is all constant.
12640 // TODO: Support FP bitcasts after legalize types.
12641 if (VT.isVector() &&
12642 (!LegalTypes ||
12643 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12644 TLI.isTypeLegal(VT.getVectorElementType()))) &&
12645 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12646 cast<BuildVectorSDNode>(N0)->isConstant())
12647 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12648 VT.getVectorElementType());
12649
12650 // If the input is a constant, let getNode fold it.
12651 if (isIntOrFPConstant(N0)) {
12652 // If we can't allow illegal operations, we need to check that this is just
12653 // a fp -> int or int -> conversion and that the resulting operation will
12654 // be legal.
12655 if (!LegalOperations ||
12656 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12657 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12658 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12659 TLI.isOperationLegal(ISD::Constant, VT))) {
12660 SDValue C = DAG.getBitcast(VT, N0);
12661 if (C.getNode() != N)
12662 return C;
12663 }
12664 }
12665
12666 // (conv (conv x, t1), t2) -> (conv x, t2)
12667 if (N0.getOpcode() == ISD::BITCAST)
12668 return DAG.getBitcast(VT, N0.getOperand(0));
12669
12670 // fold (conv (load x)) -> (load (conv*)x)
12671 // If the resultant load doesn't need a higher alignment than the original!
12672 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12673 // Do not remove the cast if the types differ in endian layout.
12674 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12675 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12676 // If the load is volatile, we only want to change the load type if the
12677 // resulting load is legal. Otherwise we might increase the number of
12678 // memory accesses. We don't care if the original type was legal or not
12679 // as we assume software couldn't rely on the number of accesses of an
12680 // illegal type.
12681 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12682 TLI.isOperationLegal(ISD::LOAD, VT))) {
12683 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12684
12685 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12686 *LN0->getMemOperand())) {
12687 SDValue Load =
12688 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12689 LN0->getPointerInfo(), LN0->getAlign(),
12690 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12691 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12692 return Load;
12693 }
12694 }
12695
12696 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12697 return V;
12698
12699 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12700 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12701 //
12702 // For ppc_fp128:
12703 // fold (bitcast (fneg x)) ->
12704 // flipbit = signbit
12705 // (xor (bitcast x) (build_pair flipbit, flipbit))
12706 //
12707 // fold (bitcast (fabs x)) ->
12708 // flipbit = (and (extract_element (bitcast x), 0), signbit)
12709 // (xor (bitcast x) (build_pair flipbit, flipbit))
12710 // This often reduces constant pool loads.
12711 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12712 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12713 N0.getNode()->hasOneUse() && VT.isInteger() &&
12714 !VT.isVector() && !N0.getValueType().isVector()) {
12715 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12716 AddToWorklist(NewConv.getNode());
12717
12718 SDLoc DL(N);
12719 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12720 assert(VT.getSizeInBits() == 128)((void)0);
12721 SDValue SignBit = DAG.getConstant(
12722 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12723 SDValue FlipBit;
12724 if (N0.getOpcode() == ISD::FNEG) {
12725 FlipBit = SignBit;
12726 AddToWorklist(FlipBit.getNode());
12727 } else {
12728 assert(N0.getOpcode() == ISD::FABS)((void)0);
12729 SDValue Hi =
12730 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12731 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12732 SDLoc(NewConv)));
12733 AddToWorklist(Hi.getNode());
12734 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12735 AddToWorklist(FlipBit.getNode());
12736 }
12737 SDValue FlipBits =
12738 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12739 AddToWorklist(FlipBits.getNode());
12740 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12741 }
12742 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12743 if (N0.getOpcode() == ISD::FNEG)
12744 return DAG.getNode(ISD::XOR, DL, VT,
12745 NewConv, DAG.getConstant(SignBit, DL, VT));
12746 assert(N0.getOpcode() == ISD::FABS)((void)0);
12747 return DAG.getNode(ISD::AND, DL, VT,
12748 NewConv, DAG.getConstant(~SignBit, DL, VT));
12749 }
12750
12751 // fold (bitconvert (fcopysign cst, x)) ->
12752 // (or (and (bitconvert x), sign), (and cst, (not sign)))
12753 // Note that we don't handle (copysign x, cst) because this can always be
12754 // folded to an fneg or fabs.
12755 //
12756 // For ppc_fp128:
12757 // fold (bitcast (fcopysign cst, x)) ->
12758 // flipbit = (and (extract_element
12759 // (xor (bitcast cst), (bitcast x)), 0),
12760 // signbit)
12761 // (xor (bitcast cst) (build_pair flipbit, flipbit))
12762 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12763 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12764 VT.isInteger() && !VT.isVector()) {
12765 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12766 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12767 if (isTypeLegal(IntXVT)) {
12768 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12769 AddToWorklist(X.getNode());
12770
12771 // If X has a different width than the result/lhs, sext it or truncate it.
12772 unsigned VTWidth = VT.getSizeInBits();
12773 if (OrigXWidth < VTWidth) {
12774 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12775 AddToWorklist(X.getNode());
12776 } else if (OrigXWidth > VTWidth) {
12777 // To get the sign bit in the right place, we have to shift it right
12778 // before truncating.
12779 SDLoc DL(X);
12780 X = DAG.getNode(ISD::SRL, DL,
12781 X.getValueType(), X,
12782 DAG.getConstant(OrigXWidth-VTWidth, DL,
12783 X.getValueType()));
12784 AddToWorklist(X.getNode());
12785 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12786 AddToWorklist(X.getNode());
12787 }
12788
12789 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12790 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12791 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12792 AddToWorklist(Cst.getNode());
12793 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12794 AddToWorklist(X.getNode());
12795 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12796 AddToWorklist(XorResult.getNode());
12797 SDValue XorResult64 = DAG.getNode(
12798 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12799 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12800 SDLoc(XorResult)));
12801 AddToWorklist(XorResult64.getNode());
12802 SDValue FlipBit =
12803 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12804 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12805 AddToWorklist(FlipBit.getNode());
12806 SDValue FlipBits =
12807 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12808 AddToWorklist(FlipBits.getNode());
12809 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12810 }
12811 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12812 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12813 X, DAG.getConstant(SignBit, SDLoc(X), VT));
12814 AddToWorklist(X.getNode());
12815
12816 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12817 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12818 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12819 AddToWorklist(Cst.getNode());
12820
12821 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12822 }
12823 }
12824
12825 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12826 if (N0.getOpcode() == ISD::BUILD_PAIR)
12827 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12828 return CombineLD;
12829
12830 // Remove double bitcasts from shuffles - this is often a legacy of
12831 // XformToShuffleWithZero being used to combine bitmaskings (of
12832 // float vectors bitcast to integer vectors) into shuffles.
12833 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12834 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12835 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12836 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12837 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12838 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12839
12840 // If operands are a bitcast, peek through if it casts the original VT.
12841 // If operands are a constant, just bitcast back to original VT.
12842 auto PeekThroughBitcast = [&](SDValue Op) {
12843 if (Op.getOpcode() == ISD::BITCAST &&
12844 Op.getOperand(0).getValueType() == VT)
12845 return SDValue(Op.getOperand(0));
12846 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12847 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12848 return DAG.getBitcast(VT, Op);
12849 return SDValue();
12850 };
12851
12852 // FIXME: If either input vector is bitcast, try to convert the shuffle to
12853 // the result type of this bitcast. This would eliminate at least one
12854 // bitcast. See the transform in InstCombine.
12855 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12856 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12857 if (!(SV0 && SV1))
12858 return SDValue();
12859
12860 int MaskScale =
12861 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12862 SmallVector<int, 8> NewMask;
12863 for (int M : SVN->getMask())
12864 for (int i = 0; i != MaskScale; ++i)
12865 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12866
12867 SDValue LegalShuffle =
12868 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12869 if (LegalShuffle)
12870 return LegalShuffle;
12871 }
12872
12873 return SDValue();
12874}
12875
12876SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12877 EVT VT = N->getValueType(0);
12878 return CombineConsecutiveLoads(N, VT);
12879}
12880
12881SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12882 SDValue N0 = N->getOperand(0);
12883
12884 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
12885 return N0;
12886
12887 return SDValue();
12888}
12889
12890/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12891/// operands. DstEltVT indicates the destination element value type.
12892SDValue DAGCombiner::
12893ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12894 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12895
12896 // If this is already the right type, we're done.
12897 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12898
12899 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12900 unsigned DstBitSize = DstEltVT.getSizeInBits();
12901
12902 // If this is a conversion of N elements of one type to N elements of another
12903 // type, convert each element. This handles FP<->INT cases.
12904 if (SrcBitSize == DstBitSize) {
12905 SmallVector<SDValue, 8> Ops;
12906 for (SDValue Op : BV->op_values()) {
12907 // If the vector element type is not legal, the BUILD_VECTOR operands
12908 // are promoted and implicitly truncated. Make that explicit here.
12909 if (Op.getValueType() != SrcEltVT)
12910 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12911 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12912 AddToWorklist(Ops.back().getNode());
12913 }
12914 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12915 BV->getValueType(0).getVectorNumElements());
12916 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12917 }
12918
12919 // Otherwise, we're growing or shrinking the elements. To avoid having to
12920 // handle annoying details of growing/shrinking FP values, we convert them to
12921 // int first.
12922 if (SrcEltVT.isFloatingPoint()) {
12923 // Convert the input float vector to a int vector where the elements are the
12924 // same sizes.
12925 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12926 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12927 SrcEltVT = IntVT;
12928 }
12929
12930 // Now we know the input is an integer vector. If the output is a FP type,
12931 // convert to integer first, then to FP of the right size.
12932 if (DstEltVT.isFloatingPoint()) {
12933 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12934 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12935
12936 // Next, convert to FP elements of the same size.
12937 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12938 }
12939
12940 SDLoc DL(BV);
12941
12942 // Okay, we know the src/dst types are both integers of differing types.
12943 // Handling growing first.
12944 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())((void)0);
12945 if (SrcBitSize < DstBitSize) {
12946 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12947
12948 SmallVector<SDValue, 8> Ops;
12949 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12950 i += NumInputsPerOutput) {
12951 bool isLE = DAG.getDataLayout().isLittleEndian();
12952 APInt NewBits = APInt(DstBitSize, 0);
12953 bool EltIsUndef = true;
12954 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12955 // Shift the previously computed bits over.
12956 NewBits <<= SrcBitSize;
12957 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12958 if (Op.isUndef()) continue;
12959 EltIsUndef = false;
12960
12961 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12962 zextOrTrunc(SrcBitSize).zext(DstBitSize);
12963 }
12964
12965 if (EltIsUndef)
12966 Ops.push_back(DAG.getUNDEF(DstEltVT));
12967 else
12968 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12969 }
12970
12971 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12972 return DAG.getBuildVector(VT, DL, Ops);
12973 }
12974
12975 // Finally, this must be the case where we are shrinking elements: each input
12976 // turns into multiple outputs.
12977 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12978 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12979 NumOutputsPerInput*BV->getNumOperands());
12980 SmallVector<SDValue, 8> Ops;
12981
12982 for (const SDValue &Op : BV->op_values()) {
12983 if (Op.isUndef()) {
12984 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12985 continue;
12986 }
12987
12988 APInt OpVal = cast<ConstantSDNode>(Op)->
12989 getAPIntValue().zextOrTrunc(SrcBitSize);
12990
12991 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12992 APInt ThisVal = OpVal.trunc(DstBitSize);
12993 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12994 OpVal.lshrInPlace(DstBitSize);
12995 }
12996
12997 // For big endian targets, swap the order of the pieces of each element.
12998 if (DAG.getDataLayout().isBigEndian())
12999 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
13000 }
13001
13002 return DAG.getBuildVector(VT, DL, Ops);
13003}
13004
13005/// Try to perform FMA combining on a given FADD node.
13006SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13007 SDValue N0 = N->getOperand(0);
13008 SDValue N1 = N->getOperand(1);
13009 EVT VT = N->getValueType(0);
13010 SDLoc SL(N);
13011
13012 const TargetOptions &Options = DAG.getTarget().Options;
13013
13014 // Floating-point multiply-add with intermediate rounding.
13015 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13016
13017 // Floating-point multiply-add without intermediate rounding.
13018 bool HasFMA =
13019 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13020 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13021
13022 // No valid opcode, do not combine.
13023 if (!HasFMAD && !HasFMA)
13024 return SDValue();
13025
13026 bool CanReassociate =
13027 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13028 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13029 Options.UnsafeFPMath || HasFMAD);
13030 // If the addition is not contractable, do not combine.
13031 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13032 return SDValue();
13033
13034 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13035 return SDValue();
13036
13037 // Always prefer FMAD to FMA for precision.
13038 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13039 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13040
13041 // Is the node an FMUL and contractable either due to global flags or
13042 // SDNodeFlags.
13043 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13044 if (N.getOpcode() != ISD::FMUL)
13045 return false;
13046 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13047 };
13048 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13049 // prefer to fold the multiply with fewer uses.
13050 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13051 if (N0.getNode()->use_size() > N1.getNode()->use_size())
13052 std::swap(N0, N1);
13053 }
13054
13055 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13056 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13057 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13058 N0.getOperand(1), N1);
13059 }
13060
13061 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13062 // Note: Commutes FADD operands.
13063 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13064 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13065 N1.getOperand(1), N0);
13066 }
13067
13068 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13069 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13070 // This requires reassociation because it changes the order of operations.
13071 SDValue FMA, E;
13072 if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
13073 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13074 N0.getOperand(2).hasOneUse()) {
13075 FMA = N0;
13076 E = N1;
13077 } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
13078 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13079 N1.getOperand(2).hasOneUse()) {
13080 FMA = N1;
13081 E = N0;
13082 }
13083 if (FMA && E) {
13084 SDValue A = FMA.getOperand(0);
13085 SDValue B = FMA.getOperand(1);
13086 SDValue C = FMA.getOperand(2).getOperand(0);
13087 SDValue D = FMA.getOperand(2).getOperand(1);
13088 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13089 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13090 }
13091
13092 // Look through FP_EXTEND nodes to do more combining.
13093
13094 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13095 if (N0.getOpcode() == ISD::FP_EXTEND) {
13096 SDValue N00 = N0.getOperand(0);
13097 if (isContractableFMUL(N00) &&
13098 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13099 N00.getValueType())) {
13100 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13101 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13102 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13103 N1);
13104 }
13105 }
13106
13107 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13108 // Note: Commutes FADD operands.
13109 if (N1.getOpcode() == ISD::FP_EXTEND) {
13110 SDValue N10 = N1.getOperand(0);
13111 if (isContractableFMUL(N10) &&
13112 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13113 N10.getValueType())) {
13114 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13115 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13116 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13117 N0);
13118 }
13119 }
13120
13121 // More folding opportunities when target permits.
13122 if (Aggressive) {
13123 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13124 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13125 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13126 SDValue Z) {
13127 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13128 DAG.getNode(PreferredFusedOpcode, SL, VT,
13129 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13130 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13131 Z));
13132 };
13133 if (N0.getOpcode() == PreferredFusedOpcode) {
13134 SDValue N02 = N0.getOperand(2);
13135 if (N02.getOpcode() == ISD::FP_EXTEND) {
13136 SDValue N020 = N02.getOperand(0);
13137 if (isContractableFMUL(N020) &&
13138 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13139 N020.getValueType())) {
13140 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13141 N020.getOperand(0), N020.getOperand(1),
13142 N1);
13143 }
13144 }
13145 }
13146
13147 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13148 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13149 // FIXME: This turns two single-precision and one double-precision
13150 // operation into two double-precision operations, which might not be
13151 // interesting for all targets, especially GPUs.
13152 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13153 SDValue Z) {
13154 return DAG.getNode(
13155 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13156 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13157 DAG.getNode(PreferredFusedOpcode, SL, VT,
13158 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13159 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13160 };
13161 if (N0.getOpcode() == ISD::FP_EXTEND) {
13162 SDValue N00 = N0.getOperand(0);
13163 if (N00.getOpcode() == PreferredFusedOpcode) {
13164 SDValue N002 = N00.getOperand(2);
13165 if (isContractableFMUL(N002) &&
13166 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13167 N00.getValueType())) {
13168 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13169 N002.getOperand(0), N002.getOperand(1),
13170 N1);
13171 }
13172 }
13173 }
13174
13175 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13176 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13177 if (N1.getOpcode() == PreferredFusedOpcode) {
13178 SDValue N12 = N1.getOperand(2);
13179 if (N12.getOpcode() == ISD::FP_EXTEND) {
13180 SDValue N120 = N12.getOperand(0);
13181 if (isContractableFMUL(N120) &&
13182 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13183 N120.getValueType())) {
13184 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13185 N120.getOperand(0), N120.getOperand(1),
13186 N0);
13187 }
13188 }
13189 }
13190
13191 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13192 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13193 // FIXME: This turns two single-precision and one double-precision
13194 // operation into two double-precision operations, which might not be
13195 // interesting for all targets, especially GPUs.
13196 if (N1.getOpcode() == ISD::FP_EXTEND) {
13197 SDValue N10 = N1.getOperand(0);
13198 if (N10.getOpcode() == PreferredFusedOpcode) {
13199 SDValue N102 = N10.getOperand(2);
13200 if (isContractableFMUL(N102) &&
13201 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13202 N10.getValueType())) {
13203 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13204 N102.getOperand(0), N102.getOperand(1),
13205 N0);
13206 }
13207 }
13208 }
13209 }
13210
13211 return SDValue();
13212}
13213
13214/// Try to perform FMA combining on a given FSUB node.
13215SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13216 SDValue N0 = N->getOperand(0);
13217 SDValue N1 = N->getOperand(1);
13218 EVT VT = N->getValueType(0);
13219 SDLoc SL(N);
13220
13221 const TargetOptions &Options = DAG.getTarget().Options;
13222 // Floating-point multiply-add with intermediate rounding.
13223 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13224
13225 // Floating-point multiply-add without intermediate rounding.
13226 bool HasFMA =
13227 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13228 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13229
13230 // No valid opcode, do not combine.
13231 if (!HasFMAD && !HasFMA)
13232 return SDValue();
13233
13234 const SDNodeFlags Flags = N->getFlags();
13235 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13236 Options.UnsafeFPMath || HasFMAD);
13237
13238 // If the subtraction is not contractable, do not combine.
13239 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13240 return SDValue();
13241
13242 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13243 return SDValue();
13244
13245 // Always prefer FMAD to FMA for precision.
13246 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13247 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13248 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13249
13250 // Is the node an FMUL and contractable either due to global flags or
13251 // SDNodeFlags.
13252 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13253 if (N.getOpcode() != ISD::FMUL)
13254 return false;
13255 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13256 };
13257
13258 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13259 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13260 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13261 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13262 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13263 }
13264 return SDValue();
13265 };
13266
13267 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13268 // Note: Commutes FSUB operands.
13269 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13270 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13271 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13272 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13273 YZ.getOperand(1), X);
13274 }
13275 return SDValue();
13276 };
13277
13278 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13279 // prefer to fold the multiply with fewer uses.
13280 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13281 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13282 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13283 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13284 return V;
13285 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13286 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13287 return V;
13288 } else {
13289 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13290 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13291 return V;
13292 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13293 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13294 return V;
13295 }
13296
13297 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13298 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13299 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13300 SDValue N00 = N0.getOperand(0).getOperand(0);
13301 SDValue N01 = N0.getOperand(0).getOperand(1);
13302 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13303 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13304 DAG.getNode(ISD::FNEG, SL, VT, N1));
13305 }
13306
13307 // Look through FP_EXTEND nodes to do more combining.
13308
13309 // fold (fsub (fpext (fmul x, y)), z)
13310 // -> (fma (fpext x), (fpext y), (fneg z))
13311 if (N0.getOpcode() == ISD::FP_EXTEND) {
13312 SDValue N00 = N0.getOperand(0);
13313 if (isContractableFMUL(N00) &&
13314 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13315 N00.getValueType())) {
13316 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13317 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13318 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13319 DAG.getNode(ISD::FNEG, SL, VT, N1));
13320 }
13321 }
13322
13323 // fold (fsub x, (fpext (fmul y, z)))
13324 // -> (fma (fneg (fpext y)), (fpext z), x)
13325 // Note: Commutes FSUB operands.
13326 if (N1.getOpcode() == ISD::FP_EXTEND) {
13327 SDValue N10 = N1.getOperand(0);
13328 if (isContractableFMUL(N10) &&
13329 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13330 N10.getValueType())) {
13331 return DAG.getNode(
13332 PreferredFusedOpcode, SL, VT,
13333 DAG.getNode(ISD::FNEG, SL, VT,
13334 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13335 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13336 }
13337 }
13338
13339 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13340 // -> (fneg (fma (fpext x), (fpext y), z))
13341 // Note: This could be removed with appropriate canonicalization of the
13342 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13343 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13344 // from implementing the canonicalization in visitFSUB.
13345 if (N0.getOpcode() == ISD::FP_EXTEND) {
13346 SDValue N00 = N0.getOperand(0);
13347 if (N00.getOpcode() == ISD::FNEG) {
13348 SDValue N000 = N00.getOperand(0);
13349 if (isContractableFMUL(N000) &&
13350 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13351 N00.getValueType())) {
13352 return DAG.getNode(
13353 ISD::FNEG, SL, VT,
13354 DAG.getNode(PreferredFusedOpcode, SL, VT,
13355 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13356 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13357 N1));
13358 }
13359 }
13360 }
13361
13362 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13363 // -> (fneg (fma (fpext x)), (fpext y), z)
13364 // Note: This could be removed with appropriate canonicalization of the
13365 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13366 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13367 // from implementing the canonicalization in visitFSUB.
13368 if (N0.getOpcode() == ISD::FNEG) {
13369 SDValue N00 = N0.getOperand(0);
13370 if (N00.getOpcode() == ISD::FP_EXTEND) {
13371 SDValue N000 = N00.getOperand(0);
13372 if (isContractableFMUL(N000) &&
13373 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13374 N000.getValueType())) {
13375 return DAG.getNode(
13376 ISD::FNEG, SL, VT,
13377 DAG.getNode(PreferredFusedOpcode, SL, VT,
13378 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13379 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13380 N1));
13381 }
13382 }
13383 }
13384
13385 auto isReassociable = [Options](SDNode *N) {
13386 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13387 };
13388
13389 auto isContractableAndReassociableFMUL = [isContractableFMUL,
13390 isReassociable](SDValue N) {
13391 return isContractableFMUL(N) && isReassociable(N.getNode());
13392 };
13393
13394 // More folding opportunities when target permits.
13395 if (Aggressive && isReassociable(N)) {
13396 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13397 // fold (fsub (fma x, y, (fmul u, v)), z)
13398 // -> (fma x, y (fma u, v, (fneg z)))
13399 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13400 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13401 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13402 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13403 N0.getOperand(1),
13404 DAG.getNode(PreferredFusedOpcode, SL, VT,
13405 N0.getOperand(2).getOperand(0),
13406 N0.getOperand(2).getOperand(1),
13407 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13408 }
13409
13410 // fold (fsub x, (fma y, z, (fmul u, v)))
13411 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13412 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13413 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13414 N1->hasOneUse() && NoSignedZero) {
13415 SDValue N20 = N1.getOperand(2).getOperand(0);
13416 SDValue N21 = N1.getOperand(2).getOperand(1);
13417 return DAG.getNode(
13418 PreferredFusedOpcode, SL, VT,
13419 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13420 DAG.getNode(PreferredFusedOpcode, SL, VT,
13421 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13422 }
13423
13424 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13425 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13426 if (N0.getOpcode() == PreferredFusedOpcode &&
13427 N0->hasOneUse()) {
13428 SDValue N02 = N0.getOperand(2);
13429 if (N02.getOpcode() == ISD::FP_EXTEND) {
13430 SDValue N020 = N02.getOperand(0);
13431 if (isContractableAndReassociableFMUL(N020) &&
13432 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13433 N020.getValueType())) {
13434 return DAG.getNode(
13435 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13436 DAG.getNode(
13437 PreferredFusedOpcode, SL, VT,
13438 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13439 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13440 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13441 }
13442 }
13443 }
13444
13445 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13446 // -> (fma (fpext x), (fpext y),
13447 // (fma (fpext u), (fpext v), (fneg z)))
13448 // FIXME: This turns two single-precision and one double-precision
13449 // operation into two double-precision operations, which might not be
13450 // interesting for all targets, especially GPUs.
13451 if (N0.getOpcode() == ISD::FP_EXTEND) {
13452 SDValue N00 = N0.getOperand(0);
13453 if (N00.getOpcode() == PreferredFusedOpcode) {
13454 SDValue N002 = N00.getOperand(2);
13455 if (isContractableAndReassociableFMUL(N002) &&
13456 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13457 N00.getValueType())) {
13458 return DAG.getNode(
13459 PreferredFusedOpcode, SL, VT,
13460 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13461 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13462 DAG.getNode(
13463 PreferredFusedOpcode, SL, VT,
13464 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13465 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13466 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13467 }
13468 }
13469 }
13470
13471 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13472 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13473 if (N1.getOpcode() == PreferredFusedOpcode &&
13474 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13475 N1->hasOneUse()) {
13476 SDValue N120 = N1.getOperand(2).getOperand(0);
13477 if (isContractableAndReassociableFMUL(N120) &&
13478 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13479 N120.getValueType())) {
13480 SDValue N1200 = N120.getOperand(0);
13481 SDValue N1201 = N120.getOperand(1);
13482 return DAG.getNode(
13483 PreferredFusedOpcode, SL, VT,
13484 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13485 DAG.getNode(PreferredFusedOpcode, SL, VT,
13486 DAG.getNode(ISD::FNEG, SL, VT,
13487 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13488 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13489 }
13490 }
13491
13492 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13493 // -> (fma (fneg (fpext y)), (fpext z),
13494 // (fma (fneg (fpext u)), (fpext v), x))
13495 // FIXME: This turns two single-precision and one double-precision
13496 // operation into two double-precision operations, which might not be
13497 // interesting for all targets, especially GPUs.
13498 if (N1.getOpcode() == ISD::FP_EXTEND &&
13499 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13500 SDValue CvtSrc = N1.getOperand(0);
13501 SDValue N100 = CvtSrc.getOperand(0);
13502 SDValue N101 = CvtSrc.getOperand(1);
13503 SDValue N102 = CvtSrc.getOperand(2);
13504 if (isContractableAndReassociableFMUL(N102) &&
13505 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13506 CvtSrc.getValueType())) {
13507 SDValue N1020 = N102.getOperand(0);
13508 SDValue N1021 = N102.getOperand(1);
13509 return DAG.getNode(
13510 PreferredFusedOpcode, SL, VT,
13511 DAG.getNode(ISD::FNEG, SL, VT,
13512 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13513 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13514 DAG.getNode(PreferredFusedOpcode, SL, VT,
13515 DAG.getNode(ISD::FNEG, SL, VT,
13516 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13517 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13518 }
13519 }
13520 }
13521
13522 return SDValue();
13523}
13524
13525/// Try to perform FMA combining on a given FMUL node based on the distributive
13526/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13527/// subtraction instead of addition).
13528SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13529 SDValue N0 = N->getOperand(0);
13530 SDValue N1 = N->getOperand(1);
13531 EVT VT = N->getValueType(0);
13532 SDLoc SL(N);
13533
13534 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")((void)0);
13535
13536 const TargetOptions &Options = DAG.getTarget().Options;
13537
13538 // The transforms below are incorrect when x == 0 and y == inf, because the
13539 // intermediate multiplication produces a nan.
13540 if (!Options.NoInfsFPMath)
13541 return SDValue();
13542
13543 // Floating-point multiply-add without intermediate rounding.
13544 bool HasFMA =
13545 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13546 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13547 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13548
13549 // Floating-point multiply-add with intermediate rounding. This can result
13550 // in a less precise result due to the changed rounding order.
13551 bool HasFMAD = Options.UnsafeFPMath &&
13552 (LegalOperations && TLI.isFMADLegal(DAG, N));
13553
13554 // No valid opcode, do not combine.
13555 if (!HasFMAD && !HasFMA)
13556 return SDValue();
13557
13558 // Always prefer FMAD to FMA for precision.
13559 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13560 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13561
13562 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13563 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13564 auto FuseFADD = [&](SDValue X, SDValue Y) {
13565 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13566 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13567 if (C->isExactlyValue(+1.0))
13568 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13569 Y);
13570 if (C->isExactlyValue(-1.0))
13571 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13572 DAG.getNode(ISD::FNEG, SL, VT, Y));
13573 }
13574 }
13575 return SDValue();
13576 };
13577
13578 if (SDValue FMA = FuseFADD(N0, N1))
13579 return FMA;
13580 if (SDValue FMA = FuseFADD(N1, N0))
13581 return FMA;
13582
13583 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13584 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13585 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13586 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13587 auto FuseFSUB = [&](SDValue X, SDValue Y) {
13588 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13589 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13590 if (C0->isExactlyValue(+1.0))
13591 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13592 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13593 Y);
13594 if (C0->isExactlyValue(-1.0))
13595 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13596 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13597 DAG.getNode(ISD::FNEG, SL, VT, Y));
13598 }
13599 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13600 if (C1->isExactlyValue(+1.0))
13601 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13602 DAG.getNode(ISD::FNEG, SL, VT, Y));
13603 if (C1->isExactlyValue(-1.0))
13604 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13605 Y);
13606 }
13607 }
13608 return SDValue();
13609 };
13610
13611 if (SDValue FMA = FuseFSUB(N0, N1))
13612 return FMA;
13613 if (SDValue FMA = FuseFSUB(N1, N0))
13614 return FMA;
13615
13616 return SDValue();
13617}
13618
13619SDValue DAGCombiner::visitFADD(SDNode *N) {
13620 SDValue N0 = N->getOperand(0);
13621 SDValue N1 = N->getOperand(1);
13622 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13623 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13624 EVT VT = N->getValueType(0);
13625 SDLoc DL(N);
13626 const TargetOptions &Options = DAG.getTarget().Options;
13627 SDNodeFlags Flags = N->getFlags();
13628 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13629
13630 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13631 return R;
13632
13633 // fold vector ops
13634 if (VT.isVector())
13635 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13636 return FoldedVOp;
13637
13638 // fold (fadd c1, c2) -> c1 + c2
13639 if (N0CFP && N1CFP)
13640 return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13641
13642 // canonicalize constant to RHS
13643 if (N0CFP && !N1CFP)
13644 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13645
13646 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13647 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13648 if (N1C && N1C->isZero())
13649 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13650 return N0;
13651
13652 if (SDValue NewSel = foldBinOpIntoSelect(N))
13653 return NewSel;
13654
13655 // fold (fadd A, (fneg B)) -> (fsub A, B)
13656 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13657 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13658 N1, DAG, LegalOperations, ForCodeSize))
13659 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13660
13661 // fold (fadd (fneg A), B) -> (fsub B, A)
13662 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13663 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13664 N0, DAG, LegalOperations, ForCodeSize))
13665 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13666
13667 auto isFMulNegTwo = [](SDValue FMul) {
13668 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13669 return false;
13670 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13671 return C && C->isExactlyValue(-2.0);
13672 };
13673
13674 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13675 if (isFMulNegTwo(N0)) {
13676 SDValue B = N0.getOperand(0);
13677 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13678 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13679 }
13680 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13681 if (isFMulNegTwo(N1)) {
13682 SDValue B = N1.getOperand(0);
13683 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13684 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13685 }
13686
13687 // No FP constant should be created after legalization as Instruction
13688 // Selection pass has a hard time dealing with FP constants.
13689 bool AllowNewConst = (Level < AfterLegalizeDAG);
13690
13691 // If nnan is enabled, fold lots of things.
13692 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13693 // If allowed, fold (fadd (fneg x), x) -> 0.0
13694 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13695 return DAG.getConstantFP(0.0, DL, VT);
13696
13697 // If allowed, fold (fadd x, (fneg x)) -> 0.0
13698 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13699 return DAG.getConstantFP(0.0, DL, VT);
13700 }
13701
13702 // If 'unsafe math' or reassoc and nsz, fold lots of things.
13703 // TODO: break out portions of the transformations below for which Unsafe is
13704 // considered and which do not require both nsz and reassoc
13705 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13706 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13707 AllowNewConst) {
13708 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13709 if (N1CFP && N0.getOpcode() == ISD::FADD &&
13710 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13711 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13712 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13713 }
13714
13715 // We can fold chains of FADD's of the same value into multiplications.
13716 // This transform is not safe in general because we are reducing the number
13717 // of rounding steps.
13718 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13719 if (N0.getOpcode() == ISD::FMUL) {
13720 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13721 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13722
13723 // (fadd (fmul x, c), x) -> (fmul x, c+1)
13724 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13725 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13726 DAG.getConstantFP(1.0, DL, VT));
13727 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13728 }
13729
13730 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13731 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13732 N1.getOperand(0) == N1.getOperand(1) &&
13733 N0.getOperand(0) == N1.getOperand(0)) {
13734 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13735 DAG.getConstantFP(2.0, DL, VT));
13736 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13737 }
13738 }
13739
13740 if (N1.getOpcode() == ISD::FMUL) {
13741 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13742 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13743
13744 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13745 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13746 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13747 DAG.getConstantFP(1.0, DL, VT));
13748 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13749 }
13750
13751 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13752 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13753 N0.getOperand(0) == N0.getOperand(1) &&
13754 N1.getOperand(0) == N0.getOperand(0)) {
13755 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13756 DAG.getConstantFP(2.0, DL, VT));
13757 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13758 }
13759 }
13760
13761 if (N0.getOpcode() == ISD::FADD) {
13762 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13763 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13764 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13765 (N0.getOperand(0) == N1)) {
13766 return DAG.getNode(ISD::FMUL, DL, VT, N1,
13767 DAG.getConstantFP(3.0, DL, VT));
13768 }
13769 }
13770
13771 if (N1.getOpcode() == ISD::FADD) {
13772 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13773 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13774 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13775 N1.getOperand(0) == N0) {
13776 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13777 DAG.getConstantFP(3.0, DL, VT));
13778 }
13779 }
13780
13781 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13782 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13783 N0.getOperand(0) == N0.getOperand(1) &&
13784 N1.getOperand(0) == N1.getOperand(1) &&
13785 N0.getOperand(0) == N1.getOperand(0)) {
13786 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13787 DAG.getConstantFP(4.0, DL, VT));
13788 }
13789 }
13790 } // enable-unsafe-fp-math
13791
13792 // FADD -> FMA combines:
13793 if (SDValue Fused = visitFADDForFMACombine(N)) {
13794 AddToWorklist(Fused.getNode());
13795 return Fused;
13796 }
13797 return SDValue();
13798}
13799
13800SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13801 SDValue Chain = N->getOperand(0);
13802 SDValue N0 = N->getOperand(1);
13803 SDValue N1 = N->getOperand(2);
13804 EVT VT = N->getValueType(0);
13805 EVT ChainVT = N->getValueType(1);
13806 SDLoc DL(N);
13807 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13808
13809 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13810 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13811 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13812 N1, DAG, LegalOperations, ForCodeSize)) {
13813 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13814 {Chain, N0, NegN1});
13815 }
13816
13817 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13818 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13819 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13820 N0, DAG, LegalOperations, ForCodeSize)) {
13821 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13822 {Chain, N1, NegN0});
13823 }
13824 return SDValue();
13825}
13826
13827SDValue DAGCombiner::visitFSUB(SDNode *N) {
13828 SDValue N0 = N->getOperand(0);
13829 SDValue N1 = N->getOperand(1);
13830 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13831 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13832 EVT VT = N->getValueType(0);
13833 SDLoc DL(N);
13834 const TargetOptions &Options = DAG.getTarget().Options;
13835 const SDNodeFlags Flags = N->getFlags();
13836 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13837
13838 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13839 return R;
13840
13841 // fold vector ops
13842 if (VT.isVector())
13843 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13844 return FoldedVOp;
13845
13846 // fold (fsub c1, c2) -> c1-c2
13847 if (N0CFP && N1CFP)
13848 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13849
13850 if (SDValue NewSel = foldBinOpIntoSelect(N))
13851 return NewSel;
13852
13853 // (fsub A, 0) -> A
13854 if (N1CFP && N1CFP->isZero()) {
13855 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13856 Flags.hasNoSignedZeros()) {
13857 return N0;
13858 }
13859 }
13860
13861 if (N0 == N1) {
13862 // (fsub x, x) -> 0.0
13863 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13864 return DAG.getConstantFP(0.0f, DL, VT);
13865 }
13866
13867 // (fsub -0.0, N1) -> -N1
13868 if (N0CFP && N0CFP->isZero()) {
13869 if (N0CFP->isNegative() ||
13870 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13871 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13872 // flushed to zero, unless all users treat denorms as zero (DAZ).
13873 // FIXME: This transform will change the sign of a NaN and the behavior
13874 // of a signaling NaN. It is only valid when a NoNaN flag is present.
13875 DenormalMode DenormMode = DAG.getDenormalMode(VT);
13876 if (DenormMode == DenormalMode::getIEEE()) {
13877 if (SDValue NegN1 =
13878 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13879 return NegN1;
13880 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13881 return DAG.getNode(ISD::FNEG, DL, VT, N1);
13882 }
13883 }
13884 }
13885
13886 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13887 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13888 N1.getOpcode() == ISD::FADD) {
13889 // X - (X + Y) -> -Y
13890 if (N0 == N1->getOperand(0))
13891 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13892 // X - (Y + X) -> -Y
13893 if (N0 == N1->getOperand(1))
13894 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13895 }
13896
13897 // fold (fsub A, (fneg B)) -> (fadd A, B)
13898 if (SDValue NegN1 =
13899 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13900 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13901
13902 // FSUB -> FMA combines:
13903 if (SDValue Fused = visitFSUBForFMACombine(N)) {
13904 AddToWorklist(Fused.getNode());
13905 return Fused;
13906 }
13907
13908 return SDValue();
13909}
13910
13911SDValue DAGCombiner::visitFMUL(SDNode *N) {
13912 SDValue N0 = N->getOperand(0);
13913 SDValue N1 = N->getOperand(1);
13914 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13915 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13916 EVT VT = N->getValueType(0);
13917 SDLoc DL(N);
13918 const TargetOptions &Options = DAG.getTarget().Options;
13919 const SDNodeFlags Flags = N->getFlags();
13920 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13921
13922 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13923 return R;
13924
13925 // fold vector ops
13926 if (VT.isVector()) {
13927 // This just handles C1 * C2 for vectors. Other vector folds are below.
13928 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13929 return FoldedVOp;
13930 }
13931
13932 // fold (fmul c1, c2) -> c1*c2
13933 if (N0CFP && N1CFP)
13934 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13935
13936 // canonicalize constant to RHS
13937 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13938 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13939 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13940
13941 if (SDValue NewSel = foldBinOpIntoSelect(N))
13942 return NewSel;
13943
13944 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13945 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13946 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13947 N0.getOpcode() == ISD::FMUL) {
13948 SDValue N00 = N0.getOperand(0);
13949 SDValue N01 = N0.getOperand(1);
13950 // Avoid an infinite loop by making sure that N00 is not a constant
13951 // (the inner multiply has not been constant folded yet).
13952 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13953 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13954 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13955 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13956 }
13957 }
13958
13959 // Match a special-case: we convert X * 2.0 into fadd.
13960 // fmul (fadd X, X), C -> fmul X, 2.0 * C
13961 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13962 N0.getOperand(0) == N0.getOperand(1)) {
13963 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13964 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13965 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13966 }
13967 }
13968
13969 // fold (fmul X, 2.0) -> (fadd X, X)
13970 if (N1CFP && N1CFP->isExactlyValue(+2.0))
13971 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13972
13973 // fold (fmul X, -1.0) -> (fneg X)
13974 if (N1CFP && N1CFP->isExactlyValue(-1.0))
13975 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13976 return DAG.getNode(ISD::FNEG, DL, VT, N0);
13977
13978 // -N0 * -N1 --> N0 * N1
13979 TargetLowering::NegatibleCost CostN0 =
13980 TargetLowering::NegatibleCost::Expensive;
13981 TargetLowering::NegatibleCost CostN1 =
13982 TargetLowering::NegatibleCost::Expensive;
13983 SDValue NegN0 =
13984 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13985 SDValue NegN1 =
13986 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13987 if (NegN0 && NegN1 &&
13988 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13989 CostN1 == TargetLowering::NegatibleCost::Cheaper))
13990 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13991
13992 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13993 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13994 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13995 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13996 TLI.isOperationLegal(ISD::FABS, VT)) {
13997 SDValue Select = N0, X = N1;
13998 if (Select.getOpcode() != ISD::SELECT)
13999 std::swap(Select, X);
14000
14001 SDValue Cond = Select.getOperand(0);
14002 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14003 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14004
14005 if (TrueOpnd && FalseOpnd &&
14006 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14007 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14008 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14009 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14010 switch (CC) {
14011 default: break;
14012 case ISD::SETOLT:
14013 case ISD::SETULT:
14014 case ISD::SETOLE:
14015 case ISD::SETULE:
14016 case ISD::SETLT:
14017 case ISD::SETLE:
14018 std::swap(TrueOpnd, FalseOpnd);
14019 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14020 case ISD::SETOGT:
14021 case ISD::SETUGT:
14022 case ISD::SETOGE:
14023 case ISD::SETUGE:
14024 case ISD::SETGT:
14025 case ISD::SETGE:
14026 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14027 TLI.isOperationLegal(ISD::FNEG, VT))
14028 return DAG.getNode(ISD::FNEG, DL, VT,
14029 DAG.getNode(ISD::FABS, DL, VT, X));
14030 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14031 return DAG.getNode(ISD::FABS, DL, VT, X);
14032
14033 break;
14034 }
14035 }
14036 }
14037
14038 // FMUL -> FMA combines:
14039 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14040 AddToWorklist(Fused.getNode());
14041 return Fused;
14042 }
14043
14044 return SDValue();
14045}
14046
14047SDValue DAGCombiner::visitFMA(SDNode *N) {
14048 SDValue N0 = N->getOperand(0);
14049 SDValue N1 = N->getOperand(1);
14050 SDValue N2 = N->getOperand(2);
14051 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14052 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14053 EVT VT = N->getValueType(0);
14054 SDLoc DL(N);
14055 const TargetOptions &Options = DAG.getTarget().Options;
14056 // FMA nodes have flags that propagate to the created nodes.
14057 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14058
14059 bool UnsafeFPMath =
14060 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14061
14062 // Constant fold FMA.
14063 if (isa<ConstantFPSDNode>(N0) &&
14064 isa<ConstantFPSDNode>(N1) &&
14065 isa<ConstantFPSDNode>(N2)) {
14066 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14067 }
14068
14069 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14070 TargetLowering::NegatibleCost CostN0 =
14071 TargetLowering::NegatibleCost::Expensive;
14072 TargetLowering::NegatibleCost CostN1 =
14073 TargetLowering::NegatibleCost::Expensive;
14074 SDValue NegN0 =
14075 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14076 SDValue NegN1 =
14077 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14078 if (NegN0 && NegN1 &&
14079 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14080 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14081 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14082
14083 if (UnsafeFPMath) {
14084 if (N0CFP && N0CFP->isZero())
14085 return N2;
14086 if (N1CFP && N1CFP->isZero())
14087 return N2;
14088 }
14089
14090 if (N0CFP && N0CFP->isExactlyValue(1.0))
14091 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14092 if (N1CFP && N1CFP->isExactlyValue(1.0))
14093 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14094
14095 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14096 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14097 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14098 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14099
14100 if (UnsafeFPMath) {
14101 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14102 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14103 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14104 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14105 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14106 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14107 }
14108
14109 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14110 if (N0.getOpcode() == ISD::FMUL &&
14111 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14112 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14113 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14114 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14115 N2);
14116 }
14117 }
14118
14119 // (fma x, -1, y) -> (fadd (fneg x), y)
14120 if (N1CFP) {
14121 if (N1CFP->isExactlyValue(1.0))
14122 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14123
14124 if (N1CFP->isExactlyValue(-1.0) &&
14125 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14126 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14127 AddToWorklist(RHSNeg.getNode());
14128 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14129 }
14130
14131 // fma (fneg x), K, y -> fma x -K, y
14132 if (N0.getOpcode() == ISD::FNEG &&
14133 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14134 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14135 ForCodeSize)))) {
14136 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14137 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14138 }
14139 }
14140
14141 if (UnsafeFPMath) {
14142 // (fma x, c, x) -> (fmul x, (c+1))
14143 if (N1CFP && N0 == N2) {
14144 return DAG.getNode(
14145 ISD::FMUL, DL, VT, N0,
14146 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14147 }
14148
14149 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14150 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14151 return DAG.getNode(
14152 ISD::FMUL, DL, VT, N0,
14153 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14154 }
14155 }
14156
14157 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14158 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14159 if (!TLI.isFNegFree(VT))
14160 if (SDValue Neg = TLI.getCheaperNegatedExpression(
14161 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14162 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14163 return SDValue();
14164}
14165
14166// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14167// reciprocal.
14168// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14169// Notice that this is not always beneficial. One reason is different targets
14170// may have different costs for FDIV and FMUL, so sometimes the cost of two
14171// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14172// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14173SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14174 // TODO: Limit this transform based on optsize/minsize - it always creates at
14175 // least 1 extra instruction. But the perf win may be substantial enough
14176 // that only minsize should restrict this.
14177 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14178 const SDNodeFlags Flags = N->getFlags();
14179 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14180 return SDValue();
14181
14182 // Skip if current node is a reciprocal/fneg-reciprocal.
14183 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14184 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14185 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14186 return SDValue();
14187
14188 // Exit early if the target does not want this transform or if there can't
14189 // possibly be enough uses of the divisor to make the transform worthwhile.
14190 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14191
14192 // For splat vectors, scale the number of uses by the splat factor. If we can
14193 // convert the division into a scalar op, that will likely be much faster.
14194 unsigned NumElts = 1;
14195 EVT VT = N->getValueType(0);
14196 if (VT.isVector() && DAG.isSplatValue(N1))
14197 NumElts = VT.getVectorNumElements();
14198
14199 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14200 return SDValue();
14201
14202 // Find all FDIV users of the same divisor.
14203 // Use a set because duplicates may be present in the user list.
14204 SetVector<SDNode *> Users;
14205 for (auto *U : N1->uses()) {
14206 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14207 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14208 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14209 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14210 U->getFlags().hasAllowReassociation() &&
14211 U->getFlags().hasNoSignedZeros())
14212 continue;
14213
14214 // This division is eligible for optimization only if global unsafe math
14215 // is enabled or if this division allows reciprocal formation.
14216 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14217 Users.insert(U);
14218 }
14219 }
14220
14221 // Now that we have the actual number of divisor uses, make sure it meets
14222 // the minimum threshold specified by the target.
14223 if ((Users.size() * NumElts) < MinUses)
14224 return SDValue();
14225
14226 SDLoc DL(N);
14227 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14228 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14229
14230 // Dividend / Divisor -> Dividend * Reciprocal
14231 for (auto *U : Users) {
14232 SDValue Dividend = U->getOperand(0);
14233 if (Dividend != FPOne) {
14234 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14235 Reciprocal, Flags);
14236 CombineTo(U, NewNode);
14237 } else if (U != Reciprocal.getNode()) {
14238 // In the absence of fast-math-flags, this user node is always the
14239 // same node as Reciprocal, but with FMF they may be different nodes.
14240 CombineTo(U, Reciprocal);
14241 }
14242 }
14243 return SDValue(N, 0); // N was replaced.
14244}
14245
14246SDValue DAGCombiner::visitFDIV(SDNode *N) {
14247 SDValue N0 = N->getOperand(0);
14248 SDValue N1 = N->getOperand(1);
14249 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14250 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14251 EVT VT = N->getValueType(0);
14252 SDLoc DL(N);
14253 const TargetOptions &Options = DAG.getTarget().Options;
14254 SDNodeFlags Flags = N->getFlags();
14255 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14256
14257 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14258 return R;
14259
14260 // fold vector ops
14261 if (VT.isVector())
14262 if (SDValue FoldedVOp = SimplifyVBinOp(N))
14263 return FoldedVOp;
14264
14265 // fold (fdiv c1, c2) -> c1/c2
14266 if (N0CFP && N1CFP)
14267 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14268
14269 if (SDValue NewSel = foldBinOpIntoSelect(N))
14270 return NewSel;
14271
14272 if (SDValue V = combineRepeatedFPDivisors(N))
14273 return V;
14274
14275 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14276 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14277 if (N1CFP) {
14278 // Compute the reciprocal 1.0 / c2.
14279 const APFloat &N1APF = N1CFP->getValueAPF();
14280 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14281 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14282 // Only do the transform if the reciprocal is a legal fp immediate that
14283 // isn't too nasty (eg NaN, denormal, ...).
14284 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14285 (!LegalOperations ||
14286 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14287 // backend)... we should handle this gracefully after Legalize.
14288 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14289 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14290 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14291 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14292 DAG.getConstantFP(Recip, DL, VT));
14293 }
14294
14295 // If this FDIV is part of a reciprocal square root, it may be folded
14296 // into a target-specific square root estimate instruction.
14297 if (N1.getOpcode() == ISD::FSQRT) {
14298 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14299 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14300 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14301 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14302 if (SDValue RV =
14303 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14304 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14305 AddToWorklist(RV.getNode());
14306 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14307 }
14308 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14309 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14310 if (SDValue RV =
14311 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14312 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14313 AddToWorklist(RV.getNode());
14314 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14315 }
14316 } else if (N1.getOpcode() == ISD::FMUL) {
14317 // Look through an FMUL. Even though this won't remove the FDIV directly,
14318 // it's still worthwhile to get rid of the FSQRT if possible.
14319 SDValue Sqrt, Y;
14320 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14321 Sqrt = N1.getOperand(0);
14322 Y = N1.getOperand(1);
14323 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14324 Sqrt = N1.getOperand(1);
14325 Y = N1.getOperand(0);
14326 }
14327 if (Sqrt.getNode()) {
14328 // If the other multiply operand is known positive, pull it into the
14329 // sqrt. That will eliminate the division if we convert to an estimate.
14330 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14331 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14332 SDValue A;
14333 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14334 A = Y.getOperand(0);
14335 else if (Y == Sqrt.getOperand(0))
14336 A = Y;
14337 if (A) {
14338 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14339 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14340 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14341 SDValue AAZ =
14342 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14343 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14344 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14345
14346 // Estimate creation failed. Clean up speculatively created nodes.
14347 recursivelyDeleteUnusedNodes(AAZ.getNode());
14348 }
14349 }
14350
14351 // We found a FSQRT, so try to make this fold:
14352 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14353 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14354 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14355 AddToWorklist(Div.getNode());
14356 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14357 }
14358 }
14359 }
14360
14361 // Fold into a reciprocal estimate and multiply instead of a real divide.
14362 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14363 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14364 return RV;
14365 }
14366
14367 // Fold X/Sqrt(X) -> Sqrt(X)
14368 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14369 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14370 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14371 return N1;
14372
14373 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14374 TargetLowering::NegatibleCost CostN0 =
14375 TargetLowering::NegatibleCost::Expensive;
14376 TargetLowering::NegatibleCost CostN1 =
14377 TargetLowering::NegatibleCost::Expensive;
14378 SDValue NegN0 =
14379 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14380 SDValue NegN1 =
14381 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14382 if (NegN0 && NegN1 &&
14383 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14384 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14385 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14386
14387 return SDValue();
14388}
14389
14390SDValue DAGCombiner::visitFREM(SDNode *N) {
14391 SDValue N0 = N->getOperand(0);
14392 SDValue N1 = N->getOperand(1);
14393 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14394 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14395 EVT VT = N->getValueType(0);
14396 SDNodeFlags Flags = N->getFlags();
14397 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14398
14399 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14400 return R;
14401
14402 // fold (frem c1, c2) -> fmod(c1,c2)
14403 if (N0CFP && N1CFP)
14404 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14405
14406 if (SDValue NewSel = foldBinOpIntoSelect(N))
14407 return NewSel;
14408
14409 return SDValue();
14410}
14411
14412SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14413 SDNodeFlags Flags = N->getFlags();
14414 const TargetOptions &Options = DAG.getTarget().Options;
14415
14416 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14417 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14418 if (!Flags.hasApproximateFuncs() ||
14419 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14420 return SDValue();
14421
14422 SDValue N0 = N->getOperand(0);
14423 if (TLI.isFsqrtCheap(N0, DAG))
14424 return SDValue();
14425
14426 // FSQRT nodes have flags that propagate to the created nodes.
14427 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14428 // transform the fdiv, we may produce a sub-optimal estimate sequence
14429 // because the reciprocal calculation may not have to filter out a
14430 // 0.0 input.
14431 return buildSqrtEstimate(N0, Flags);
14432}
14433
14434/// copysign(x, fp_extend(y)) -> copysign(x, y)
14435/// copysign(x, fp_round(y)) -> copysign(x, y)
14436static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14437 SDValue N1 = N->getOperand(1);
14438 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14439 N1.getOpcode() == ISD::FP_ROUND)) {
14440 EVT N1VT = N1->getValueType(0);
14441 EVT N1Op0VT = N1->getOperand(0).getValueType();
14442
14443 // Always fold no-op FP casts.
14444 if (N1VT == N1Op0VT)
14445 return true;
14446
14447 // Do not optimize out type conversion of f128 type yet.
14448 // For some targets like x86_64, configuration is changed to keep one f128
14449 // value in one SSE register, but instruction selection cannot handle
14450 // FCOPYSIGN on SSE registers yet.
14451 if (N1Op0VT == MVT::f128)
14452 return false;
14453
14454 // Avoid mismatched vector operand types, for better instruction selection.
14455 if (N1Op0VT.isVector())
14456 return false;
14457
14458 return true;
14459 }
14460 return false;
14461}
14462
14463SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14464 SDValue N0 = N->getOperand(0);
14465 SDValue N1 = N->getOperand(1);
14466 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14467 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14468 EVT VT = N->getValueType(0);
14469
14470 if (N0CFP && N1CFP) // Constant fold
14471 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14472
14473 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14474 const APFloat &V = N1C->getValueAPF();
14475 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14476 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14477 if (!V.isNegative()) {
14478 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14479 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14480 } else {
14481 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14482 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14483 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14484 }
14485 }
14486
14487 // copysign(fabs(x), y) -> copysign(x, y)
14488 // copysign(fneg(x), y) -> copysign(x, y)
14489 // copysign(copysign(x,z), y) -> copysign(x, y)
14490 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14491 N0.getOpcode() == ISD::FCOPYSIGN)
14492 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14493
14494 // copysign(x, abs(y)) -> abs(x)
14495 if (N1.getOpcode() == ISD::FABS)
14496 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14497
14498 // copysign(x, copysign(y,z)) -> copysign(x, z)
14499 if (N1.getOpcode() == ISD::FCOPYSIGN)
14500 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14501
14502 // copysign(x, fp_extend(y)) -> copysign(x, y)
14503 // copysign(x, fp_round(y)) -> copysign(x, y)
14504 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14505 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14506
14507 return SDValue();
14508}
14509
14510SDValue DAGCombiner::visitFPOW(SDNode *N) {
14511 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14512 if (!ExponentC)
14513 return SDValue();
14514 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14515
14516 // Try to convert x ** (1/3) into cube root.
14517 // TODO: Handle the various flavors of long double.
14518 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14519 // Some range near 1/3 should be fine.
14520 EVT VT = N->getValueType(0);
14521 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14522 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14523 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14524 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14525 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14526 // For regular numbers, rounding may cause the results to differ.
14527 // Therefore, we require { nsz ninf nnan afn } for this transform.
14528 // TODO: We could select out the special cases if we don't have nsz/ninf.
14529 SDNodeFlags Flags = N->getFlags();
14530 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14531 !Flags.hasApproximateFuncs())
14532 return SDValue();
14533
14534 // Do not create a cbrt() libcall if the target does not have it, and do not
14535 // turn a pow that has lowering support into a cbrt() libcall.
14536 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14537 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14538 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14539 return SDValue();
14540
14541 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14542 }
14543
14544 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14545 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14546 // TODO: This could be extended (using a target hook) to handle smaller
14547 // power-of-2 fractional exponents.
14548 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14549 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14550 if (ExponentIs025 || ExponentIs075) {
14551 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14552 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14553 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14554 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14555 // For regular numbers, rounding may cause the results to differ.
14556 // Therefore, we require { nsz ninf afn } for this transform.
14557 // TODO: We could select out the special cases if we don't have nsz/ninf.
14558 SDNodeFlags Flags = N->getFlags();
14559
14560 // We only need no signed zeros for the 0.25 case.
14561 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14562 !Flags.hasApproximateFuncs())
14563 return SDValue();
14564
14565 // Don't double the number of libcalls. We are trying to inline fast code.
14566 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14567 return SDValue();
14568
14569 // Assume that libcalls are the smallest code.
14570 // TODO: This restriction should probably be lifted for vectors.
14571 if (ForCodeSize)
14572 return SDValue();
14573
14574 // pow(X, 0.25) --> sqrt(sqrt(X))
14575 SDLoc DL(N);
14576 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14577 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14578 if (ExponentIs025)
14579 return SqrtSqrt;
14580 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14581 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14582 }
14583
14584 return SDValue();
14585}
14586
14587static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14588 const TargetLowering &TLI) {
14589 // This optimization is guarded by a function attribute because it may produce
14590 // unexpected results. Ie, programs may be relying on the platform-specific
14591 // undefined behavior when the float-to-int conversion overflows.
14592 const Function &F = DAG.getMachineFunction().getFunction();
14593 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14594 if (StrictOverflow.getValueAsString().equals("false"))
14595 return SDValue();
14596
14597 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14598 // replacing casts with a libcall. We also must be allowed to ignore -0.0
14599 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14600 // conversions would return +0.0.
14601 // FIXME: We should be able to use node-level FMF here.
14602 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14603 EVT VT = N->getValueType(0);
14604 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14605 !DAG.getTarget().Options.NoSignedZerosFPMath)
14606 return SDValue();
14607
14608 // fptosi/fptoui round towards zero, so converting from FP to integer and
14609 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14610 SDValue N0 = N->getOperand(0);
14611 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14612 N0.getOperand(0).getValueType() == VT)
14613 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14614
14615 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14616 N0.getOperand(0).getValueType() == VT)
14617 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14618
14619 return SDValue();
14620}
14621
14622SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14623 SDValue N0 = N->getOperand(0);
14624 EVT VT = N->getValueType(0);
14625 EVT OpVT = N0.getValueType();
14626
14627 // [us]itofp(undef) = 0, because the result value is bounded.
14628 if (N0.isUndef())
14629 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14630
14631 // fold (sint_to_fp c1) -> c1fp
14632 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14633 // ...but only if the target supports immediate floating-point values
14634 (!LegalOperations ||
14635 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14636 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14637
14638 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14639 // but UINT_TO_FP is legal on this target, try to convert.
14640 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14641 hasOperation(ISD::UINT_TO_FP, OpVT)) {
14642 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14643 if (DAG.SignBitIsZero(N0))
14644 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14645 }
14646
14647 // The next optimizations are desirable only if SELECT_CC can be lowered.
14648 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14649 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14650 !VT.isVector() &&
14651 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14652 SDLoc DL(N);
14653 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14654 DAG.getConstantFP(0.0, DL, VT));
14655 }
14656
14657 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14658 // (select (setcc x, y, cc), 1.0, 0.0)
14659 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14660 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14661 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14662 SDLoc DL(N);
14663 return DAG.getSelect(DL, VT, N0.getOperand(0),
14664 DAG.getConstantFP(1.0, DL, VT),
14665 DAG.getConstantFP(0.0, DL, VT));
14666 }
14667
14668 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14669 return FTrunc;
14670
14671 return SDValue();
14672}
14673
14674SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14675 SDValue N0 = N->getOperand(0);
14676 EVT VT = N->getValueType(0);
14677 EVT OpVT = N0.getValueType();
14678
14679 // [us]itofp(undef) = 0, because the result value is bounded.
14680 if (N0.isUndef())
14681 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14682
14683 // fold (uint_to_fp c1) -> c1fp
14684 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14685 // ...but only if the target supports immediate floating-point values
14686 (!LegalOperations ||
14687 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14688 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14689
14690 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14691 // but SINT_TO_FP is legal on this target, try to convert.
14692 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14693 hasOperation(ISD::SINT_TO_FP, OpVT)) {
14694 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14695 if (DAG.SignBitIsZero(N0))
14696 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14697 }
14698
14699 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14700 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14701 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14702 SDLoc DL(N);
14703 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14704 DAG.getConstantFP(0.0, DL, VT));
14705 }
14706
14707 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14708 return FTrunc;
14709
14710 return SDValue();
14711}
14712
14713// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14714static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14715 SDValue N0 = N->getOperand(0);
14716 EVT VT = N->getValueType(0);
14717
14718 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14719 return SDValue();
14720
14721 SDValue Src = N0.getOperand(0);
14722 EVT SrcVT = Src.getValueType();
14723 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14724 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14725
14726 // We can safely assume the conversion won't overflow the output range,
14727 // because (for example) (uint8_t)18293.f is undefined behavior.
14728
14729 // Since we can assume the conversion won't overflow, our decision as to
14730 // whether the input will fit in the float should depend on the minimum
14731 // of the input range and output range.
14732
14733 // This means this is also safe for a signed input and unsigned output, since
14734 // a negative input would lead to undefined behavior.
14735 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14736 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14737 unsigned ActualSize = std::min(InputSize, OutputSize);
14738 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14739
14740 // We can only fold away the float conversion if the input range can be
14741 // represented exactly in the float range.
14742 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14743 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14744 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14745 : ISD::ZERO_EXTEND;
14746 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14747 }
14748 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14749 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14750 return DAG.getBitcast(VT, Src);
14751 }
14752 return SDValue();
14753}
14754
14755SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14756 SDValue N0 = N->getOperand(0);
14757 EVT VT = N->getValueType(0);
14758
14759 // fold (fp_to_sint undef) -> undef
14760 if (N0.isUndef())
14761 return DAG.getUNDEF(VT);
14762
14763 // fold (fp_to_sint c1fp) -> c1
14764 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14765 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14766
14767 return FoldIntToFPToInt(N, DAG);
14768}
14769
14770SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14771 SDValue N0 = N->getOperand(0);
14772 EVT VT = N->getValueType(0);
14773
14774 // fold (fp_to_uint undef) -> undef
14775 if (N0.isUndef())
14776 return DAG.getUNDEF(VT);
14777
14778 // fold (fp_to_uint c1fp) -> c1
14779 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14780 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14781
14782 return FoldIntToFPToInt(N, DAG);
14783}
14784
14785SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14786 SDValue N0 = N->getOperand(0);
14787 SDValue N1 = N->getOperand(1);
14788 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14789 EVT VT = N->getValueType(0);
14790
14791 // fold (fp_round c1fp) -> c1fp
14792 if (N0CFP)
14793 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14794
14795 // fold (fp_round (fp_extend x)) -> x
14796 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14797 return N0.getOperand(0);
14798
14799 // fold (fp_round (fp_round x)) -> (fp_round x)
14800 if (N0.getOpcode() == ISD::FP_ROUND) {
14801 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14802 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14803
14804 // Skip this folding if it results in an fp_round from f80 to f16.
14805 //
14806 // f80 to f16 always generates an expensive (and as yet, unimplemented)
14807 // libcall to __truncxfhf2 instead of selecting native f16 conversion
14808 // instructions from f32 or f64. Moreover, the first (value-preserving)
14809 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14810 // x86.
14811 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14812 return SDValue();
14813
14814 // If the first fp_round isn't a value preserving truncation, it might
14815 // introduce a tie in the second fp_round, that wouldn't occur in the
14816 // single-step fp_round we want to fold to.
14817 // In other words, double rounding isn't the same as rounding.
14818 // Also, this is a value preserving truncation iff both fp_round's are.
14819 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14820 SDLoc DL(N);
14821 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14822 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14823 }
14824 }
14825
14826 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14827 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14828 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14829 N0.getOperand(0), N1);
14830 AddToWorklist(Tmp.getNode());
14831 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14832 Tmp, N0.getOperand(1));
14833 }
14834
14835 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14836 return NewVSel;
14837
14838 return SDValue();
14839}
14840
14841SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14842 SDValue N0 = N->getOperand(0);
14843 EVT VT = N->getValueType(0);
14844
14845 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14846 if (N->hasOneUse() &&
14847 N->use_begin()->getOpcode() == ISD::FP_ROUND)
14848 return SDValue();
14849
14850 // fold (fp_extend c1fp) -> c1fp
14851 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14852 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14853
14854 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14855 if (N0.getOpcode() == ISD::FP16_TO_FP &&
14856 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14857 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14858
14859 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14860 // value of X.
14861 if (N0.getOpcode() == ISD::FP_ROUND
14862 && N0.getConstantOperandVal(1) == 1) {
14863 SDValue In = N0.getOperand(0);
14864 if (In.getValueType() == VT) return In;
14865 if (VT.bitsLT(In.getValueType()))
14866 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14867 In, N0.getOperand(1));
14868 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14869 }
14870
14871 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14872 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14873 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14874 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14875 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14876 LN0->getChain(),
14877 LN0->getBasePtr(), N0.getValueType(),
14878 LN0->getMemOperand());
14879 CombineTo(N, ExtLoad);
14880 CombineTo(N0.getNode(),
14881 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14882 N0.getValueType(), ExtLoad,
14883 DAG.getIntPtrConstant(1, SDLoc(N0))),
14884 ExtLoad.getValue(1));
14885 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14886 }
14887
14888 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14889 return NewVSel;
14890
14891 return SDValue();
14892}
14893
14894SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14895 SDValue N0 = N->getOperand(0);
14896 EVT VT = N->getValueType(0);
14897
14898 // fold (fceil c1) -> fceil(c1)
14899 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14900 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14901
14902 return SDValue();
14903}
14904
14905SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14906 SDValue N0 = N->getOperand(0);
14907 EVT VT = N->getValueType(0);
14908
14909 // fold (ftrunc c1) -> ftrunc(c1)
14910 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14911 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14912
14913 // fold ftrunc (known rounded int x) -> x
14914 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14915 // likely to be generated to extract integer from a rounded floating value.
14916 switch (N0.getOpcode()) {
14917 default: break;
14918 case ISD::FRINT:
14919 case ISD::FTRUNC:
14920 case ISD::FNEARBYINT:
14921 case ISD::FFLOOR:
14922 case ISD::FCEIL:
14923 return N0;
14924 }
14925
14926 return SDValue();
14927}
14928
14929SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14930 SDValue N0 = N->getOperand(0);
14931 EVT VT = N->getValueType(0);
14932
14933 // fold (ffloor c1) -> ffloor(c1)
14934 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14935 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14936
14937 return SDValue();
14938}
14939
14940SDValue DAGCombiner::visitFNEG(SDNode *N) {
14941 SDValue N0 = N->getOperand(0);
14942 EVT VT = N->getValueType(0);
14943 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14944
14945 // Constant fold FNEG.
14946 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14947 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14948
14949 if (SDValue NegN0 =
14950 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14951 return NegN0;
14952
14953 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14954 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14955 // know it was called from a context with a nsz flag if the input fsub does
14956 // not.
14957 if (N0.getOpcode() == ISD::FSUB &&
14958 (DAG.getTarget().Options.NoSignedZerosFPMath ||
14959 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14960 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14961 N0.getOperand(0));
14962 }
14963
14964 if (SDValue Cast = foldSignChangeInBitcast(N))
14965 return Cast;
14966
14967 return SDValue();
14968}
14969
14970static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14971 APFloat (*Op)(const APFloat &, const APFloat &)) {
14972 SDValue N0 = N->getOperand(0);
14973 SDValue N1 = N->getOperand(1);
14974 EVT VT = N->getValueType(0);
14975 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14976 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14977 const SDNodeFlags Flags = N->getFlags();
14978 unsigned Opc = N->getOpcode();
14979 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14980 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14981 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14982
14983 if (N0CFP && N1CFP) {
14984 const APFloat &C0 = N0CFP->getValueAPF();
14985 const APFloat &C1 = N1CFP->getValueAPF();
14986 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14987 }
14988
14989 // Canonicalize to constant on RHS.
14990 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14991 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14992 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14993
14994 if (N1CFP) {
14995 const APFloat &AF = N1CFP->getValueAPF();
14996
14997 // minnum(X, nan) -> X
14998 // maxnum(X, nan) -> X
14999 // minimum(X, nan) -> nan
15000 // maximum(X, nan) -> nan
15001 if (AF.isNaN())
15002 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15003
15004 // In the following folds, inf can be replaced with the largest finite
15005 // float, if the ninf flag is set.
15006 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15007 // minnum(X, -inf) -> -inf
15008 // maxnum(X, +inf) -> +inf
15009 // minimum(X, -inf) -> -inf if nnan
15010 // maximum(X, +inf) -> +inf if nnan
15011 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15012 return N->getOperand(1);
15013
15014 // minnum(X, +inf) -> X if nnan
15015 // maxnum(X, -inf) -> X if nnan
15016 // minimum(X, +inf) -> X
15017 // maximum(X, -inf) -> X
15018 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15019 return N->getOperand(0);
15020 }
15021 }
15022
15023 return SDValue();
15024}
15025
15026SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
15027 return visitFMinMax(DAG, N, minnum);
15028}
15029
15030SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
15031 return visitFMinMax(DAG, N, maxnum);
15032}
15033
15034SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
15035 return visitFMinMax(DAG, N, minimum);
15036}
15037
15038SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
15039 return visitFMinMax(DAG, N, maximum);
15040}
15041
15042SDValue DAGCombiner::visitFABS(SDNode *N) {
15043 SDValue N0 = N->getOperand(0);
15044 EVT VT = N->getValueType(0);
15045
15046 // fold (fabs c1) -> fabs(c1)
15047 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15048 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15049
15050 // fold (fabs (fabs x)) -> (fabs x)
15051 if (N0.getOpcode() == ISD::FABS)
15052 return N->getOperand(0);
15053
15054 // fold (fabs (fneg x)) -> (fabs x)
15055 // fold (fabs (fcopysign x, y)) -> (fabs x)
15056 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15057 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15058
15059 if (SDValue Cast = foldSignChangeInBitcast(N))
15060 return Cast;
15061
15062 return SDValue();
15063}
15064
15065SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15066 SDValue Chain = N->getOperand(0);
15067 SDValue N1 = N->getOperand(1);
15068 SDValue N2 = N->getOperand(2);
15069
15070 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15071 // nondeterministic jumps).
15072 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15073 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15074 N1->getOperand(0), N2);
15075 }
15076
15077 // If N is a constant we could fold this into a fallthrough or unconditional
15078 // branch. However that doesn't happen very often in normal code, because
15079 // Instcombine/SimplifyCFG should have handled the available opportunities.
15080 // If we did this folding here, it would be necessary to update the
15081 // MachineBasicBlock CFG, which is awkward.
15082
15083 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15084 // on the target.
15085 if (N1.getOpcode() == ISD::SETCC &&
15086 TLI.isOperationLegalOrCustom(ISD::BR_CC,
15087 N1.getOperand(0).getValueType())) {
15088 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15089 Chain, N1.getOperand(2),
15090 N1.getOperand(0), N1.getOperand(1), N2);
15091 }
15092
15093 if (N1.hasOneUse()) {
15094 // rebuildSetCC calls visitXor which may change the Chain when there is a
15095 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15096 HandleSDNode ChainHandle(Chain);
15097 if (SDValue NewN1 = rebuildSetCC(N1))
15098 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15099 ChainHandle.getValue(), NewN1, N2);
15100 }
15101
15102 return SDValue();
15103}
15104
15105SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15106 if (N.getOpcode() == ISD::SRL ||
15107 (N.getOpcode() == ISD::TRUNCATE &&
15108 (N.getOperand(0).hasOneUse() &&
15109 N.getOperand(0).getOpcode() == ISD::SRL))) {
15110 // Look pass the truncate.
15111 if (N.getOpcode() == ISD::TRUNCATE)
15112 N = N.getOperand(0);
15113
15114 // Match this pattern so that we can generate simpler code:
15115 //
15116 // %a = ...
15117 // %b = and i32 %a, 2
15118 // %c = srl i32 %b, 1
15119 // brcond i32 %c ...
15120 //
15121 // into
15122 //
15123 // %a = ...
15124 // %b = and i32 %a, 2
15125 // %c = setcc eq %b, 0
15126 // brcond %c ...
15127 //
15128 // This applies only when the AND constant value has one bit set and the
15129 // SRL constant is equal to the log2 of the AND constant. The back-end is
15130 // smart enough to convert the result into a TEST/JMP sequence.
15131 SDValue Op0 = N.getOperand(0);
15132 SDValue Op1 = N.getOperand(1);
15133
15134 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15135 SDValue AndOp1 = Op0.getOperand(1);
15136
15137 if (AndOp1.getOpcode() == ISD::Constant) {
15138 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15139
15140 if (AndConst.isPowerOf2() &&
15141 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15142 SDLoc DL(N);
15143 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15144 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15145 ISD::SETNE);
15146 }
15147 }
15148 }
15149 }
15150
15151 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15152 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15153 if (N.getOpcode() == ISD::XOR) {
15154 // Because we may call this on a speculatively constructed
15155 // SimplifiedSetCC Node, we need to simplify this node first.
15156 // Ideally this should be folded into SimplifySetCC and not
15157 // here. For now, grab a handle to N so we don't lose it from
15158 // replacements interal to the visit.
15159 HandleSDNode XORHandle(N);
15160 while (N.getOpcode() == ISD::XOR) {
15161 SDValue Tmp = visitXOR(N.getNode());
15162 // No simplification done.
15163 if (!Tmp.getNode())
15164 break;
15165 // Returning N is form in-visit replacement that may invalidated
15166 // N. Grab value from Handle.
15167 if (Tmp.getNode() == N.getNode())
15168 N = XORHandle.getValue();
15169 else // Node simplified. Try simplifying again.
15170 N = Tmp;
15171 }
15172
15173 if (N.getOpcode() != ISD::XOR)
15174 return N;
15175
15176 SDValue Op0 = N->getOperand(0);
15177 SDValue Op1 = N->getOperand(1);
15178
15179 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15180 bool Equal = false;
15181 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15182 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15183 Op0.getValueType() == MVT::i1) {
15184 N = Op0;
15185 Op0 = N->getOperand(0);
15186 Op1 = N->getOperand(1);
15187 Equal = true;
15188 }
15189
15190 EVT SetCCVT = N.getValueType();
15191 if (LegalTypes)
15192 SetCCVT = getSetCCResultType(SetCCVT);
15193 // Replace the uses of XOR with SETCC
15194 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15195 Equal ? ISD::SETEQ : ISD::SETNE);
15196 }
15197 }
15198
15199 return SDValue();
15200}
15201
15202// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15203//
15204SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15205 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15206 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15207
15208 // If N is a constant we could fold this into a fallthrough or unconditional
15209 // branch. However that doesn't happen very often in normal code, because
15210 // Instcombine/SimplifyCFG should have handled the available opportunities.
15211 // If we did this folding here, it would be necessary to update the
15212 // MachineBasicBlock CFG, which is awkward.
15213
15214 // Use SimplifySetCC to simplify SETCC's.
15215 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15216 CondLHS, CondRHS, CC->get(), SDLoc(N),
15217 false);
15218 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15219
15220 // fold to a simpler setcc
15221 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15222 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15223 N->getOperand(0), Simp.getOperand(2),
15224 Simp.getOperand(0), Simp.getOperand(1),
15225 N->getOperand(4));
15226
15227 return SDValue();
15228}
15229
15230static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15231 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15232 const TargetLowering &TLI) {
15233 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15234 if (LD->isIndexed())
15235 return false;
15236 EVT VT = LD->getMemoryVT();
15237 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15238 return false;
15239 Ptr = LD->getBasePtr();
15240 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15241 if (ST->isIndexed())
15242 return false;
15243 EVT VT = ST->getMemoryVT();
15244 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15245 return false;
15246 Ptr = ST->getBasePtr();
15247 IsLoad = false;
15248 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15249 if (LD->isIndexed())
15250 return false;
15251 EVT VT = LD->getMemoryVT();
15252 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15253 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15254 return false;
15255 Ptr = LD->getBasePtr();
15256 IsMasked = true;
15257 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15258 if (ST->isIndexed())
15259 return false;
15260 EVT VT = ST->getMemoryVT();
15261 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15262 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15263 return false;
15264 Ptr = ST->getBasePtr();
15265 IsLoad = false;
15266 IsMasked = true;
15267 } else {
15268 return false;
15269 }
15270 return true;
15271}
15272
15273/// Try turning a load/store into a pre-indexed load/store when the base
15274/// pointer is an add or subtract and it has other uses besides the load/store.
15275/// After the transformation, the new indexed load/store has effectively folded
15276/// the add/subtract in and all of its other uses are redirected to the
15277/// new load/store.
15278bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15279 if (Level < AfterLegalizeDAG)
15280 return false;
15281
15282 bool IsLoad = true;
15283 bool IsMasked = false;
15284 SDValue Ptr;
15285 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15286 Ptr, TLI))
15287 return false;
15288
15289 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15290 // out. There is no reason to make this a preinc/predec.
15291 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15292 Ptr.getNode()->hasOneUse())
15293 return false;
15294
15295 // Ask the target to do addressing mode selection.
15296 SDValue BasePtr;
15297 SDValue Offset;
15298 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15299 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15300 return false;
15301
15302 // Backends without true r+i pre-indexed forms may need to pass a
15303 // constant base with a variable offset so that constant coercion
15304 // will work with the patterns in canonical form.
15305 bool Swapped = false;
15306 if (isa<ConstantSDNode>(BasePtr)) {
15307 std::swap(BasePtr, Offset);
15308 Swapped = true;
15309 }
15310
15311 // Don't create a indexed load / store with zero offset.
15312 if (isNullConstant(Offset))
15313 return false;
15314
15315 // Try turning it into a pre-indexed load / store except when:
15316 // 1) The new base ptr is a frame index.
15317 // 2) If N is a store and the new base ptr is either the same as or is a
15318 // predecessor of the value being stored.
15319 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15320 // that would create a cycle.
15321 // 4) All uses are load / store ops that use it as old base ptr.
15322
15323 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15324 // (plus the implicit offset) to a register to preinc anyway.
15325 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15326 return false;
15327
15328 // Check #2.
15329 if (!IsLoad) {
15330 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15331 : cast<StoreSDNode>(N)->getValue();
15332
15333 // Would require a copy.
15334 if (Val == BasePtr)
15335 return false;
15336
15337 // Would create a cycle.
15338 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15339 return false;
15340 }
15341
15342 // Caches for hasPredecessorHelper.
15343 SmallPtrSet<const SDNode *, 32> Visited;
15344 SmallVector<const SDNode *, 16> Worklist;
15345 Worklist.push_back(N);
15346
15347 // If the offset is a constant, there may be other adds of constants that
15348 // can be folded with this one. We should do this to avoid having to keep
15349 // a copy of the original base pointer.
15350 SmallVector<SDNode *, 16> OtherUses;
15351 if (isa<ConstantSDNode>(Offset))
15352 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15353 UE = BasePtr.getNode()->use_end();
15354 UI != UE; ++UI) {
15355 SDUse &Use = UI.getUse();
15356 // Skip the use that is Ptr and uses of other results from BasePtr's
15357 // node (important for nodes that return multiple results).
15358 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15359 continue;
15360
15361 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15362 continue;
15363
15364 if (Use.getUser()->getOpcode() != ISD::ADD &&
15365 Use.getUser()->getOpcode() != ISD::SUB) {
15366 OtherUses.clear();
15367 break;
15368 }
15369
15370 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15371 if (!isa<ConstantSDNode>(Op1)) {
15372 OtherUses.clear();
15373 break;
15374 }
15375
15376 // FIXME: In some cases, we can be smarter about this.
15377 if (Op1.getValueType() != Offset.getValueType()) {
15378 OtherUses.clear();
15379 break;
15380 }
15381
15382 OtherUses.push_back(Use.getUser());
15383 }
15384
15385 if (Swapped)
15386 std::swap(BasePtr, Offset);
15387
15388 // Now check for #3 and #4.
15389 bool RealUse = false;
15390
15391 for (SDNode *Use : Ptr.getNode()->uses()) {
15392 if (Use == N)
15393 continue;
15394 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15395 return false;
15396
15397 // If Ptr may be folded in addressing mode of other use, then it's
15398 // not profitable to do this transformation.
15399 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15400 RealUse = true;
15401 }
15402
15403 if (!RealUse)
15404 return false;
15405
15406 SDValue Result;
15407 if (!IsMasked) {
15408 if (IsLoad)
15409 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15410 else
15411 Result =
15412 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15413 } else {
15414 if (IsLoad)
15415 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15416 Offset, AM);
15417 else
15418 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15419 Offset, AM);
15420 }
15421 ++PreIndexedNodes;
15422 ++NodesCombined;
15423 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
15424 Result.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
15425 WorklistRemover DeadNodes(*this);
15426 if (IsLoad) {
15427 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15428 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15429 } else {
15430 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15431 }
15432
15433 // Finally, since the node is now dead, remove it from the graph.
15434 deleteAndRecombine(N);
15435
15436 if (Swapped)
15437 std::swap(BasePtr, Offset);
15438
15439 // Replace other uses of BasePtr that can be updated to use Ptr
15440 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15441 unsigned OffsetIdx = 1;
15442 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15443 OffsetIdx = 0;
15444 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==((void)0)
15445 BasePtr.getNode() && "Expected BasePtr operand")((void)0);
15446
15447 // We need to replace ptr0 in the following expression:
15448 // x0 * offset0 + y0 * ptr0 = t0
15449 // knowing that
15450 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15451 //
15452 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15453 // indexed load/store and the expression that needs to be re-written.
15454 //
15455 // Therefore, we have:
15456 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15457
15458 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15459 const APInt &Offset0 = CN->getAPIntValue();
15460 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15461 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15462 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15463 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15464 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15465
15466 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15467
15468 APInt CNV = Offset0;
15469 if (X0 < 0) CNV = -CNV;
15470 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15471 else CNV = CNV - Offset1;
15472
15473 SDLoc DL(OtherUses[i]);
15474
15475 // We can now generate the new expression.
15476 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15477 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15478
15479 SDValue NewUse = DAG.getNode(Opcode,
15480 DL,
15481 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15482 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15483 deleteAndRecombine(OtherUses[i]);
15484 }
15485
15486 // Replace the uses of Ptr with uses of the updated base value.
15487 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15488 deleteAndRecombine(Ptr.getNode());
15489 AddToWorklist(Result.getNode());
15490
15491 return true;
15492}
15493
15494static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15495 SDValue &BasePtr, SDValue &Offset,
15496 ISD::MemIndexedMode &AM,
15497 SelectionDAG &DAG,
15498 const TargetLowering &TLI) {
15499 if (PtrUse == N ||
15500 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15501 return false;
15502
15503 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15504 return false;
15505
15506 // Don't create a indexed load / store with zero offset.
15507 if (isNullConstant(Offset))
15508 return false;
15509
15510 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15511 return false;
15512
15513 SmallPtrSet<const SDNode *, 32> Visited;
15514 for (SDNode *Use : BasePtr.getNode()->uses()) {
15515 if (Use == Ptr.getNode())
15516 continue;
15517
15518 // No if there's a later user which could perform the index instead.
15519 if (isa<MemSDNode>(Use)) {
15520 bool IsLoad = true;
15521 bool IsMasked = false;
15522 SDValue OtherPtr;
15523 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15524 IsMasked, OtherPtr, TLI)) {
15525 SmallVector<const SDNode *, 2> Worklist;
15526 Worklist.push_back(Use);
15527 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15528 return false;
15529 }
15530 }
15531
15532 // If all the uses are load / store addresses, then don't do the
15533 // transformation.
15534 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15535 for (SDNode *UseUse : Use->uses())
15536 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15537 return false;
15538 }
15539 }
15540 return true;
15541}
15542
15543static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15544 bool &IsMasked, SDValue &Ptr,
15545 SDValue &BasePtr, SDValue &Offset,
15546 ISD::MemIndexedMode &AM,
15547 SelectionDAG &DAG,
15548 const TargetLowering &TLI) {
15549 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15550 IsMasked, Ptr, TLI) ||
15551 Ptr.getNode()->hasOneUse())
15552 return nullptr;
15553
15554 // Try turning it into a post-indexed load / store except when
15555 // 1) All uses are load / store ops that use it as base ptr (and
15556 // it may be folded as addressing mmode).
15557 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15558 // nor a successor of N. Otherwise, if Op is folded that would
15559 // create a cycle.
15560 for (SDNode *Op : Ptr->uses()) {
15561 // Check for #1.
15562 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15563 continue;
15564
15565 // Check for #2.
15566 SmallPtrSet<const SDNode *, 32> Visited;
15567 SmallVector<const SDNode *, 8> Worklist;
15568 // Ptr is predecessor to both N and Op.
15569 Visited.insert(Ptr.getNode());
15570 Worklist.push_back(N);
15571 Worklist.push_back(Op);
15572 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15573 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15574 return Op;
15575 }
15576 return nullptr;
15577}
15578
15579/// Try to combine a load/store with a add/sub of the base pointer node into a
15580/// post-indexed load/store. The transformation folded the add/subtract into the
15581/// new indexed load/store effectively and all of its uses are redirected to the
15582/// new load/store.
15583bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15584 if (Level < AfterLegalizeDAG)
15585 return false;
15586
15587 bool IsLoad = true;
15588 bool IsMasked = false;
15589 SDValue Ptr;
15590 SDValue BasePtr;
15591 SDValue Offset;
15592 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15593 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15594 Offset, AM, DAG, TLI);
15595 if (!Op)
15596 return false;
15597
15598 SDValue Result;
15599 if (!IsMasked)
15600 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15601 Offset, AM)
15602 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15603 BasePtr, Offset, AM);
15604 else
15605 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15606 BasePtr, Offset, AM)
15607 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15608 BasePtr, Offset, AM);
15609 ++PostIndexedNodes;
15610 ++NodesCombined;
15611 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { } while (false)
15612 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { } while (false)
15613 dbgs() << '\n')do { } while (false);
15614 WorklistRemover DeadNodes(*this);
15615 if (IsLoad) {
15616 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15617 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15618 } else {
15619 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15620 }
15621
15622 // Finally, since the node is now dead, remove it from the graph.
15623 deleteAndRecombine(N);
15624
15625 // Replace the uses of Use with uses of the updated base value.
15626 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15627 Result.getValue(IsLoad ? 1 : 0));
15628 deleteAndRecombine(Op);
15629 return true;
15630}
15631
15632/// Return the base-pointer arithmetic from an indexed \p LD.
15633SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15634 ISD::MemIndexedMode AM = LD->getAddressingMode();
15635 assert(AM != ISD::UNINDEXED)((void)0);
15636 SDValue BP = LD->getOperand(1);
15637 SDValue Inc = LD->getOperand(2);
15638
15639 // Some backends use TargetConstants for load offsets, but don't expect
15640 // TargetConstants in general ADD nodes. We can convert these constants into
15641 // regular Constants (if the constant is not opaque).
15642 assert((Inc.getOpcode() != ISD::TargetConstant ||((void)0)
15643 !cast<ConstantSDNode>(Inc)->isOpaque()) &&((void)0)
15644 "Cannot split out indexing using opaque target constants")((void)0);
15645 if (Inc.getOpcode() == ISD::TargetConstant) {
15646 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15647 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15648 ConstInc->getValueType(0));
15649 }
15650
15651 unsigned Opc =
15652 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15653 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15654}
15655
15656static inline ElementCount numVectorEltsOrZero(EVT T) {
15657 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15658}
15659
15660bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15661 Val = ST->getValue();
15662 EVT STType = Val.getValueType();
15663 EVT STMemType = ST->getMemoryVT();
15664 if (STType == STMemType)
15665 return true;
15666 if (isTypeLegal(STMemType))
15667 return false; // fail.
15668 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15669 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15670 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15671 return true;
15672 }
15673 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15674 STType.isInteger() && STMemType.isInteger()) {
15675 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15676 return true;
15677 }
15678 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15679 Val = DAG.getBitcast(STMemType, Val);
15680 return true;
15681 }
15682 return false; // fail.
15683}
15684
15685bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15686 EVT LDMemType = LD->getMemoryVT();
15687 EVT LDType = LD->getValueType(0);
15688 assert(Val.getValueType() == LDMemType &&((void)0)
15689 "Attempting to extend value of non-matching type")((void)0);
15690 if (LDType == LDMemType)
15691 return true;
15692 if (LDMemType.isInteger() && LDType.isInteger()) {
15693 switch (LD->getExtensionType()) {
15694 case ISD::NON_EXTLOAD:
15695 Val = DAG.getBitcast(LDType, Val);
15696 return true;
15697 case ISD::EXTLOAD:
15698 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15699 return true;
15700 case ISD::SEXTLOAD:
15701 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15702 return true;
15703 case ISD::ZEXTLOAD:
15704 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15705 return true;
15706 }
15707 }
15708 return false;
15709}
15710
15711SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15712 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15713 return SDValue();
15714 SDValue Chain = LD->getOperand(0);
15715 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15716 // TODO: Relax this restriction for unordered atomics (see D66309)
15717 if (!ST || !ST->isSimple())
15718 return SDValue();
15719
15720 EVT LDType = LD->getValueType(0);
15721 EVT LDMemType = LD->getMemoryVT();
15722 EVT STMemType = ST->getMemoryVT();
15723 EVT STType = ST->getValue().getValueType();
15724
15725 // There are two cases to consider here:
15726 // 1. The store is fixed width and the load is scalable. In this case we
15727 // don't know at compile time if the store completely envelops the load
15728 // so we abandon the optimisation.
15729 // 2. The store is scalable and the load is fixed width. We could
15730 // potentially support a limited number of cases here, but there has been
15731 // no cost-benefit analysis to prove it's worth it.
15732 bool LdStScalable = LDMemType.isScalableVector();
15733 if (LdStScalable != STMemType.isScalableVector())
15734 return SDValue();
15735
15736 // If we are dealing with scalable vectors on a big endian platform the
15737 // calculation of offsets below becomes trickier, since we do not know at
15738 // compile time the absolute size of the vector. Until we've done more
15739 // analysis on big-endian platforms it seems better to bail out for now.
15740 if (LdStScalable && DAG.getDataLayout().isBigEndian())
15741 return SDValue();
15742
15743 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15744 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15745 int64_t Offset;
15746 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15747 return SDValue();
15748
15749 // Normalize for Endianness. After this Offset=0 will denote that the least
15750 // significant bit in the loaded value maps to the least significant bit in
15751 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15752 // n:th least significant byte of the stored value.
15753 if (DAG.getDataLayout().isBigEndian())
15754 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15755 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15756 8 -
15757 Offset;
15758
15759 // Check that the stored value cover all bits that are loaded.
15760 bool STCoversLD;
15761
15762 TypeSize LdMemSize = LDMemType.getSizeInBits();
15763 TypeSize StMemSize = STMemType.getSizeInBits();
15764 if (LdStScalable)
15765 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15766 else
15767 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15768 StMemSize.getFixedSize());
15769
15770 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15771 if (LD->isIndexed()) {
15772 // Cannot handle opaque target constants and we must respect the user's
15773 // request not to split indexes from loads.
15774 if (!canSplitIdx(LD))
15775 return SDValue();
15776 SDValue Idx = SplitIndexingFromLoad(LD);
15777 SDValue Ops[] = {Val, Idx, Chain};
15778 return CombineTo(LD, Ops, 3);
15779 }
15780 return CombineTo(LD, Val, Chain);
15781 };
15782
15783 if (!STCoversLD)
15784 return SDValue();
15785
15786 // Memory as copy space (potentially masked).
15787 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15788 // Simple case: Direct non-truncating forwarding
15789 if (LDType.getSizeInBits() == LdMemSize)
15790 return ReplaceLd(LD, ST->getValue(), Chain);
15791 // Can we model the truncate and extension with an and mask?
15792 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15793 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15794 // Mask to size of LDMemType
15795 auto Mask =
15796 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15797 StMemSize.getFixedSize()),
15798 SDLoc(ST), STType);
15799 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15800 return ReplaceLd(LD, Val, Chain);
15801 }
15802 }
15803
15804 // TODO: Deal with nonzero offset.
15805 if (LD->getBasePtr().isUndef() || Offset != 0)
15806 return SDValue();
15807 // Model necessary truncations / extenstions.
15808 SDValue Val;
15809 // Truncate Value To Stored Memory Size.
15810 do {
15811 if (!getTruncatedStoreValue(ST, Val))
15812 continue;
15813 if (!isTypeLegal(LDMemType))
15814 continue;
15815 if (STMemType != LDMemType) {
15816 // TODO: Support vectors? This requires extract_subvector/bitcast.
15817 if (!STMemType.isVector() && !LDMemType.isVector() &&
15818 STMemType.isInteger() && LDMemType.isInteger())
15819 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15820 else
15821 continue;
15822 }
15823 if (!extendLoadedValueToExtension(LD, Val))
15824 continue;
15825 return ReplaceLd(LD, Val, Chain);
15826 } while (false);
15827
15828 // On failure, cleanup dead nodes we may have created.
15829 if (Val->use_empty())
15830 deleteAndRecombine(Val.getNode());
15831 return SDValue();
15832}
15833
15834SDValue DAGCombiner::visitLOAD(SDNode *N) {
15835 LoadSDNode *LD = cast<LoadSDNode>(N);
15836 SDValue Chain = LD->getChain();
15837 SDValue Ptr = LD->getBasePtr();
15838
15839 // If load is not volatile and there are no uses of the loaded value (and
15840 // the updated indexed value in case of indexed loads), change uses of the
15841 // chain value into uses of the chain input (i.e. delete the dead load).
15842 // TODO: Allow this for unordered atomics (see D66309)
15843 if (LD->isSimple()) {
15844 if (N->getValueType(1) == MVT::Other) {
15845 // Unindexed loads.
15846 if (!N->hasAnyUseOfValue(0)) {
15847 // It's not safe to use the two value CombineTo variant here. e.g.
15848 // v1, chain2 = load chain1, loc
15849 // v2, chain3 = load chain2, loc
15850 // v3 = add v2, c
15851 // Now we replace use of chain2 with chain1. This makes the second load
15852 // isomorphic to the one we are deleting, and thus makes this load live.
15853 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { } while (false)
15854 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { } while (false)
15855 dbgs() << "\n")do { } while (false);
15856 WorklistRemover DeadNodes(*this);
15857 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15858 AddUsersToWorklist(Chain.getNode());
15859 if (N->use_empty())
15860 deleteAndRecombine(N);
15861
15862 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15863 }
15864 } else {
15865 // Indexed loads.
15866 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")((void)0);
15867
15868 // If this load has an opaque TargetConstant offset, then we cannot split
15869 // the indexing into an add/sub directly (that TargetConstant may not be
15870 // valid for a different type of node, and we cannot convert an opaque
15871 // target constant into a regular constant).
15872 bool CanSplitIdx = canSplitIdx(LD);
15873
15874 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15875 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15876 SDValue Index;
15877 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15878 Index = SplitIndexingFromLoad(LD);
15879 // Try to fold the base pointer arithmetic into subsequent loads and
15880 // stores.
15881 AddUsersToWorklist(N);
15882 } else
15883 Index = DAG.getUNDEF(N->getValueType(1));
15884 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { } while (false)
15885 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { } while (false)
15886 dbgs() << " and 2 other values\n")do { } while (false);
15887 WorklistRemover DeadNodes(*this);
15888 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15889 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15890 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15891 deleteAndRecombine(N);
15892 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15893 }
15894 }
15895 }
15896
15897 // If this load is directly stored, replace the load value with the stored
15898 // value.
15899 if (auto V = ForwardStoreValueToDirectLoad(LD))
15900 return V;
15901
15902 // Try to infer better alignment information than the load already has.
15903 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15904 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15905 if (*Alignment > LD->getAlign() &&
15906 isAligned(*Alignment, LD->getSrcValueOffset())) {
15907 SDValue NewLoad = DAG.getExtLoad(
15908 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15909 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15910 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15911 // NewLoad will always be N as we are only refining the alignment
15912 assert(NewLoad.getNode() == N)((void)0);
15913 (void)NewLoad;
15914 }
15915 }
15916 }
15917
15918 if (LD->isUnindexed()) {
15919 // Walk up chain skipping non-aliasing memory nodes.
15920 SDValue BetterChain = FindBetterChain(LD, Chain);
15921
15922 // If there is a better chain.
15923 if (Chain != BetterChain) {
15924 SDValue ReplLoad;
15925
15926 // Replace the chain to void dependency.
15927 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15928 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15929 BetterChain, Ptr, LD->getMemOperand());
15930 } else {
15931 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15932 LD->getValueType(0),
15933 BetterChain, Ptr, LD->getMemoryVT(),
15934 LD->getMemOperand());
15935 }
15936
15937 // Create token factor to keep old chain connected.
15938 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15939 MVT::Other, Chain, ReplLoad.getValue(1));
15940
15941 // Replace uses with load result and token factor
15942 return CombineTo(N, ReplLoad.getValue(0), Token);
15943 }
15944 }
15945
15946 // Try transforming N to an indexed load.
15947 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15948 return SDValue(N, 0);
15949
15950 // Try to slice up N to more direct loads if the slices are mapped to
15951 // different register banks or pairing can take place.
15952 if (SliceUpLoad(N))
15953 return SDValue(N, 0);
15954
15955 return SDValue();
15956}
15957
15958namespace {
15959
15960/// Helper structure used to slice a load in smaller loads.
15961/// Basically a slice is obtained from the following sequence:
15962/// Origin = load Ty1, Base
15963/// Shift = srl Ty1 Origin, CstTy Amount
15964/// Inst = trunc Shift to Ty2
15965///
15966/// Then, it will be rewritten into:
15967/// Slice = load SliceTy, Base + SliceOffset
15968/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15969///
15970/// SliceTy is deduced from the number of bits that are actually used to
15971/// build Inst.
15972struct LoadedSlice {
15973 /// Helper structure used to compute the cost of a slice.
15974 struct Cost {
15975 /// Are we optimizing for code size.
15976 bool ForCodeSize = false;
15977
15978 /// Various cost.
15979 unsigned Loads = 0;
15980 unsigned Truncates = 0;
15981 unsigned CrossRegisterBanksCopies = 0;
15982 unsigned ZExts = 0;
15983 unsigned Shift = 0;
15984
15985 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15986
15987 /// Get the cost of one isolated slice.
15988 Cost(const LoadedSlice &LS, bool ForCodeSize)
15989 : ForCodeSize(ForCodeSize), Loads(1) {
15990 EVT TruncType = LS.Inst->getValueType(0);
15991 EVT LoadedType = LS.getLoadedType();
15992 if (TruncType != LoadedType &&
15993 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15994 ZExts = 1;
15995 }
15996
15997 /// Account for slicing gain in the current cost.
15998 /// Slicing provide a few gains like removing a shift or a
15999 /// truncate. This method allows to grow the cost of the original
16000 /// load with the gain from this slice.
16001 void addSliceGain(const LoadedSlice &LS) {
16002 // Each slice saves a truncate.
16003 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16004 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16005 LS.Inst->getValueType(0)))
16006 ++Truncates;
16007 // If there is a shift amount, this slice gets rid of it.
16008 if (LS.Shift)
16009 ++Shift;
16010 // If this slice can merge a cross register bank copy, account for it.
16011 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16012 ++CrossRegisterBanksCopies;
16013 }
16014
16015 Cost &operator+=(const Cost &RHS) {
16016 Loads += RHS.Loads;
16017 Truncates += RHS.Truncates;
16018 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16019 ZExts += RHS.ZExts;
16020 Shift += RHS.Shift;
16021 return *this;
16022 }
16023
16024 bool operator==(const Cost &RHS) const {
16025 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16026 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16027 ZExts == RHS.ZExts && Shift == RHS.Shift;
16028 }
16029
16030 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16031
16032 bool operator<(const Cost &RHS) const {
16033 // Assume cross register banks copies are as expensive as loads.
16034 // FIXME: Do we want some more target hooks?
16035 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16036 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16037 // Unless we are optimizing for code size, consider the
16038 // expensive operation first.
16039 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16040 return ExpensiveOpsLHS < ExpensiveOpsRHS;
16041 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16042 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16043 }
16044
16045 bool operator>(const Cost &RHS) const { return RHS < *this; }
16046
16047 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16048
16049 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16050 };
16051
16052 // The last instruction that represent the slice. This should be a
16053 // truncate instruction.
16054 SDNode *Inst;
16055
16056 // The original load instruction.
16057 LoadSDNode *Origin;
16058
16059 // The right shift amount in bits from the original load.
16060 unsigned Shift;
16061
16062 // The DAG from which Origin came from.
16063 // This is used to get some contextual information about legal types, etc.
16064 SelectionDAG *DAG;
16065
16066 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16067 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16068 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16069
16070 /// Get the bits used in a chunk of bits \p BitWidth large.
16071 /// \return Result is \p BitWidth and has used bits set to 1 and
16072 /// not used bits set to 0.
16073 APInt getUsedBits() const {
16074 // Reproduce the trunc(lshr) sequence:
16075 // - Start from the truncated value.
16076 // - Zero extend to the desired bit width.
16077 // - Shift left.
16078 assert(Origin && "No original load to compare against.")((void)0);
16079 unsigned BitWidth = Origin->getValueSizeInBits(0);
16080 assert(Inst && "This slice is not bound to an instruction")((void)0);
16081 assert(Inst->getValueSizeInBits(0) <= BitWidth &&((void)0)
16082 "Extracted slice is bigger than the whole type!")((void)0);
16083 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16084 UsedBits.setAllBits();
16085 UsedBits = UsedBits.zext(BitWidth);
16086 UsedBits <<= Shift;
16087 return UsedBits;
16088 }
16089
16090 /// Get the size of the slice to be loaded in bytes.
16091 unsigned getLoadedSize() const {
16092 unsigned SliceSize = getUsedBits().countPopulation();
16093 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")((void)0);
16094 return SliceSize / 8;
16095 }
16096
16097 /// Get the type that will be loaded for this slice.
16098 /// Note: This may not be the final type for the slice.
16099 EVT getLoadedType() const {
16100 assert(DAG && "Missing context")((void)0);
16101 LLVMContext &Ctxt = *DAG->getContext();
16102 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16103 }
16104
16105 /// Get the alignment of the load used for this slice.
16106 Align getAlign() const {
16107 Align Alignment = Origin->getAlign();
16108 uint64_t Offset = getOffsetFromBase();
16109 if (Offset != 0)
16110 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16111 return Alignment;
16112 }
16113
16114 /// Check if this slice can be rewritten with legal operations.
16115 bool isLegal() const {
16116 // An invalid slice is not legal.
16117 if (!Origin || !Inst || !DAG)
16118 return false;
16119
16120 // Offsets are for indexed load only, we do not handle that.
16121 if (!Origin->getOffset().isUndef())
16122 return false;
16123
16124 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16125
16126 // Check that the type is legal.
16127 EVT SliceType = getLoadedType();
16128 if (!TLI.isTypeLegal(SliceType))
16129 return false;
16130
16131 // Check that the load is legal for this type.
16132 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16133 return false;
16134
16135 // Check that the offset can be computed.
16136 // 1. Check its type.
16137 EVT PtrType = Origin->getBasePtr().getValueType();
16138 if (PtrType == MVT::Untyped || PtrType.isExtended())
16139 return false;
16140
16141 // 2. Check that it fits in the immediate.
16142 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16143 return false;
16144
16145 // 3. Check that the computation is legal.
16146 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16147 return false;
16148
16149 // Check that the zext is legal if it needs one.
16150 EVT TruncateType = Inst->getValueType(0);
16151 if (TruncateType != SliceType &&
16152 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16153 return false;
16154
16155 return true;
16156 }
16157
16158 /// Get the offset in bytes of this slice in the original chunk of
16159 /// bits.
16160 /// \pre DAG != nullptr.
16161 uint64_t getOffsetFromBase() const {
16162 assert(DAG && "Missing context.")((void)0);
16163 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16164 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")((void)0);
16165 uint64_t Offset = Shift / 8;
16166 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16167 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&((void)0)
16168 "The size of the original loaded type is not a multiple of a"((void)0)
16169 " byte.")((void)0);
16170 // If Offset is bigger than TySizeInBytes, it means we are loading all
16171 // zeros. This should have been optimized before in the process.
16172 assert(TySizeInBytes > Offset &&((void)0)
16173 "Invalid shift amount for given loaded size")((void)0);
16174 if (IsBigEndian)
16175 Offset = TySizeInBytes - Offset - getLoadedSize();
16176 return Offset;
16177 }
16178
16179 /// Generate the sequence of instructions to load the slice
16180 /// represented by this object and redirect the uses of this slice to
16181 /// this new sequence of instructions.
16182 /// \pre this->Inst && this->Origin are valid Instructions and this
16183 /// object passed the legal check: LoadedSlice::isLegal returned true.
16184 /// \return The last instruction of the sequence used to load the slice.
16185 SDValue loadSlice() const {
16186 assert(Inst && Origin && "Unable to replace a non-existing slice.")((void)0);
16187 const SDValue &OldBaseAddr = Origin->getBasePtr();
16188 SDValue BaseAddr = OldBaseAddr;
16189 // Get the offset in that chunk of bytes w.r.t. the endianness.
16190 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16191 assert(Offset >= 0 && "Offset too big to fit in int64_t!")((void)0);
16192 if (Offset) {
16193 // BaseAddr = BaseAddr + Offset.
16194 EVT ArithType = BaseAddr.getValueType();
16195 SDLoc DL(Origin);
16196 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16197 DAG->getConstant(Offset, DL, ArithType));
16198 }
16199
16200 // Create the type of the loaded slice according to its size.
16201 EVT SliceType = getLoadedType();
16202
16203 // Create the load for the slice.
16204 SDValue LastInst =
16205 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16206 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16207 Origin->getMemOperand()->getFlags());
16208 // If the final type is not the same as the loaded type, this means that
16209 // we have to pad with zero. Create a zero extend for that.
16210 EVT FinalType = Inst->getValueType(0);
16211 if (SliceType != FinalType)
16212 LastInst =
16213 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16214 return LastInst;
16215 }
16216
16217 /// Check if this slice can be merged with an expensive cross register
16218 /// bank copy. E.g.,
16219 /// i = load i32
16220 /// f = bitcast i32 i to float
16221 bool canMergeExpensiveCrossRegisterBankCopy() const {
16222 if (!Inst || !Inst->hasOneUse())
16223 return false;
16224 SDNode *Use = *Inst->use_begin();
16225 if (Use->getOpcode() != ISD::BITCAST)
16226 return false;
16227 assert(DAG && "Missing context")((void)0);
16228 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16229 EVT ResVT = Use->getValueType(0);
16230 const TargetRegisterClass *ResRC =
16231 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16232 const TargetRegisterClass *ArgRC =
16233 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16234 Use->getOperand(0)->isDivergent());
16235 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16236 return false;
16237
16238 // At this point, we know that we perform a cross-register-bank copy.
16239 // Check if it is expensive.
16240 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16241 // Assume bitcasts are cheap, unless both register classes do not
16242 // explicitly share a common sub class.
16243 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16244 return false;
16245
16246 // Check if it will be merged with the load.
16247 // 1. Check the alignment constraint.
16248 Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
16249 ResVT.getTypeForEVT(*DAG->getContext()));
16250
16251 if (RequiredAlignment > getAlign())
16252 return false;
16253
16254 // 2. Check that the load is a legal operation for that type.
16255 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16256 return false;
16257
16258 // 3. Check that we do not have a zext in the way.
16259 if (Inst->getValueType(0) != getLoadedType())
16260 return false;
16261
16262 return true;
16263 }
16264};
16265
16266} // end anonymous namespace
16267
16268/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16269/// \p UsedBits looks like 0..0 1..1 0..0.
16270static bool areUsedBitsDense(const APInt &UsedBits) {
16271 // If all the bits are one, this is dense!
16272 if (UsedBits.isAllOnesValue())
16273 return true;
16274
16275 // Get rid of the unused bits on the right.
16276 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16277 // Get rid of the unused bits on the left.
16278 if (NarrowedUsedBits.countLeadingZeros())
16279 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16280 // Check that the chunk of bits is completely used.
16281 return NarrowedUsedBits.isAllOnesValue();
16282}
16283
16284/// Check whether or not \p First and \p Second are next to each other
16285/// in memory. This means that there is no hole between the bits loaded
16286/// by \p First and the bits loaded by \p Second.
16287static bool areSlicesNextToEachOther(const LoadedSlice &First,
16288 const LoadedSlice &Second) {
16289 assert(First.Origin == Second.Origin && First.Origin &&((void)0)
16290 "Unable to match different memory origins.")((void)0);
16291 APInt UsedBits = First.getUsedBits();
16292 assert((UsedBits & Second.getUsedBits()) == 0 &&((void)0)
16293 "Slices are not supposed to overlap.")((void)0);
16294 UsedBits |= Second.getUsedBits();
16295 return areUsedBitsDense(UsedBits);
16296}
16297
16298/// Adjust the \p GlobalLSCost according to the target
16299/// paring capabilities and the layout of the slices.
16300/// \pre \p GlobalLSCost should account for at least as many loads as
16301/// there is in the slices in \p LoadedSlices.
16302static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16303 LoadedSlice::Cost &GlobalLSCost) {
16304 unsigned NumberOfSlices = LoadedSlices.size();
16305 // If there is less than 2 elements, no pairing is possible.
16306 if (NumberOfSlices < 2)
16307 return;
16308
16309 // Sort the slices so that elements that are likely to be next to each
16310 // other in memory are next to each other in the list.
16311 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16312 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")((void)0);
16313 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16314 });
16315 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16316 // First (resp. Second) is the first (resp. Second) potentially candidate
16317 // to be placed in a paired load.
16318 const LoadedSlice *First = nullptr;
16319 const LoadedSlice *Second = nullptr;
16320 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16321 // Set the beginning of the pair.
16322 First = Second) {
16323 Second = &LoadedSlices[CurrSlice];
16324
16325 // If First is NULL, it means we start a new pair.
16326 // Get to the next slice.
16327 if (!First)
16328 continue;
16329
16330 EVT LoadedType = First->getLoadedType();
16331
16332 // If the types of the slices are different, we cannot pair them.
16333 if (LoadedType != Second->getLoadedType())
16334 continue;
16335
16336 // Check if the target supplies paired loads for this type.
16337 Align RequiredAlignment;
16338 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16339 // move to the next pair, this type is hopeless.
16340 Second = nullptr;
16341 continue;
16342 }
16343 // Check if we meet the alignment requirement.
16344 if (First->getAlign() < RequiredAlignment)
16345 continue;
16346
16347 // Check that both loads are next to each other in memory.
16348 if (!areSlicesNextToEachOther(*First, *Second))
16349 continue;
16350
16351 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")((void)0);
16352 --GlobalLSCost.Loads;
16353 // Move to the next pair.
16354 Second = nullptr;
16355 }
16356}
16357
16358/// Check the profitability of all involved LoadedSlice.
16359/// Currently, it is considered profitable if there is exactly two
16360/// involved slices (1) which are (2) next to each other in memory, and
16361/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16362///
16363/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16364/// the elements themselves.
16365///
16366/// FIXME: When the cost model will be mature enough, we can relax
16367/// constraints (1) and (2).
16368static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16369 const APInt &UsedBits, bool ForCodeSize) {
16370 unsigned NumberOfSlices = LoadedSlices.size();
16371 if (StressLoadSlicing)
16372 return NumberOfSlices > 1;
16373
16374 // Check (1).
16375 if (NumberOfSlices != 2)
16376 return false;
16377
16378 // Check (2).
16379 if (!areUsedBitsDense(UsedBits))
16380 return false;
16381
16382 // Check (3).
16383 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16384 // The original code has one big load.
16385 OrigCost.Loads = 1;
16386 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16387 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16388 // Accumulate the cost of all the slices.
16389 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16390 GlobalSlicingCost += SliceCost;
16391
16392 // Account as cost in the original configuration the gain obtained
16393 // with the current slices.
16394 OrigCost.addSliceGain(LS);
16395 }
16396
16397 // If the target supports paired load, adjust the cost accordingly.
16398 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16399 return OrigCost > GlobalSlicingCost;
16400}
16401
16402/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16403/// operations, split it in the various pieces being extracted.
16404///
16405/// This sort of thing is introduced by SROA.
16406/// This slicing takes care not to insert overlapping loads.
16407/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16408bool DAGCombiner::SliceUpLoad(SDNode *N) {
16409 if (Level < AfterLegalizeDAG)
16410 return false;
16411
16412 LoadSDNode *LD = cast<LoadSDNode>(N);
16413 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16414 !LD->getValueType(0).isInteger())
16415 return false;
16416
16417 // The algorithm to split up a load of a scalable vector into individual
16418 // elements currently requires knowing the length of the loaded type,
16419 // so will need adjusting to work on scalable vectors.
16420 if (LD->getValueType(0).isScalableVector())
16421 return false;
16422
16423 // Keep track of already used bits to detect overlapping values.
16424 // In that case, we will just abort the transformation.
16425 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16426
16427 SmallVector<LoadedSlice, 4> LoadedSlices;
16428
16429 // Check if this load is used as several smaller chunks of bits.
16430 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16431 // of computation for each trunc.
16432 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16433 UI != UIEnd; ++UI) {
16434 // Skip the uses of the chain.
16435 if (UI.getUse().getResNo() != 0)
16436 continue;
16437
16438 SDNode *User = *UI;
16439 unsigned Shift = 0;
16440
16441 // Check if this is a trunc(lshr).
16442 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16443 isa<ConstantSDNode>(User->getOperand(1))) {
16444 Shift = User->getConstantOperandVal(1);
16445 User = *User->use_begin();
16446 }
16447
16448 // At this point, User is a Truncate, iff we encountered, trunc or
16449 // trunc(lshr).
16450 if (User->getOpcode() != ISD::TRUNCATE)
16451 return false;
16452
16453 // The width of the type must be a power of 2 and greater than 8-bits.
16454 // Otherwise the load cannot be represented in LLVM IR.
16455 // Moreover, if we shifted with a non-8-bits multiple, the slice
16456 // will be across several bytes. We do not support that.
16457 unsigned Width = User->getValueSizeInBits(0);
16458 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16459 return false;
16460
16461 // Build the slice for this chain of computations.
16462 LoadedSlice LS(User, LD, Shift, &DAG);
16463 APInt CurrentUsedBits = LS.getUsedBits();
16464
16465 // Check if this slice overlaps with another.
16466 if ((CurrentUsedBits & UsedBits) != 0)
16467 return false;
16468 // Update the bits used globally.
16469 UsedBits |= CurrentUsedBits;
16470
16471 // Check if the new slice would be legal.
16472 if (!LS.isLegal())
16473 return false;
16474
16475 // Record the slice.
16476 LoadedSlices.push_back(LS);
16477 }
16478
16479 // Abort slicing if it does not seem to be profitable.
16480 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16481 return false;
16482
16483 ++SlicedLoads;
16484
16485 // Rewrite each chain to use an independent load.
16486 // By construction, each chain can be represented by a unique load.
16487
16488 // Prepare the argument for the new token factor for all the slices.
16489 SmallVector<SDValue, 8> ArgChains;
16490 for (const LoadedSlice &LS : LoadedSlices) {
16491 SDValue SliceInst = LS.loadSlice();
16492 CombineTo(LS.Inst, SliceInst, true);
16493 if (SliceInst.getOpcode() != ISD::LOAD)
16494 SliceInst = SliceInst.getOperand(0);
16495 assert(SliceInst->getOpcode() == ISD::LOAD &&((void)0)
16496 "It takes more than a zext to get to the loaded slice!!")((void)0);
16497 ArgChains.push_back(SliceInst.getValue(1));
16498 }
16499
16500 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16501 ArgChains);
16502 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16503 AddToWorklist(Chain.getNode());
16504 return true;
16505}
16506
16507/// Check to see if V is (and load (ptr), imm), where the load is having
16508/// specific bytes cleared out. If so, return the byte size being masked out
16509/// and the shift amount.
16510static std::pair<unsigned, unsigned>
16511CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16512 std::pair<unsigned, unsigned> Result(0, 0);
16513
16514 // Check for the structure we're looking for.
16515 if (V->getOpcode() != ISD::AND ||
16516 !isa<ConstantSDNode>(V->getOperand(1)) ||
16517 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16518 return Result;
16519
16520 // Check the chain and pointer.
16521 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16522 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16523
16524 // This only handles simple types.
16525 if (V.getValueType() != MVT::i16 &&
16526 V.getValueType() != MVT::i32 &&
16527 V.getValueType() != MVT::i64)
16528 return Result;
16529
16530 // Check the constant mask. Invert it so that the bits being masked out are
16531 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16532 // follow the sign bit for uniformity.
16533 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16534 unsigned NotMaskLZ = countLeadingZeros(NotMask);
16535 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16536 unsigned NotMaskTZ = countTrailingZeros(NotMask);
16537 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16538 if (NotMaskLZ == 64) return Result; // All zero mask.
16539
16540 // See if we have a continuous run of bits. If so, we have 0*1+0*
16541 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16542 return Result;
16543
16544 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16545 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16546 NotMaskLZ -= 64-V.getValueSizeInBits();
16547
16548 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16549 switch (MaskedBytes) {
16550 case 1:
16551 case 2:
16552 case 4: break;
16553 default: return Result; // All one mask, or 5-byte mask.
16554 }
16555
16556 // Verify that the first bit starts at a multiple of mask so that the access
16557 // is aligned the same as the access width.
16558 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16559
16560 // For narrowing to be valid, it must be the case that the load the
16561 // immediately preceding memory operation before the store.
16562 if (LD == Chain.getNode())
16563 ; // ok.
16564 else if (Chain->getOpcode() == ISD::TokenFactor &&
16565 SDValue(LD, 1).hasOneUse()) {
16566 // LD has only 1 chain use so they are no indirect dependencies.
16567 if (!LD->isOperandOf(Chain.getNode()))
16568 return Result;
16569 } else
16570 return Result; // Fail.
16571
16572 Result.first = MaskedBytes;
16573 Result.second = NotMaskTZ/8;
16574 return Result;
16575}
16576
16577/// Check to see if IVal is something that provides a value as specified by
16578/// MaskInfo. If so, replace the specified store with a narrower store of
16579/// truncated IVal.
16580static SDValue
16581ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16582 SDValue IVal, StoreSDNode *St,
16583 DAGCombiner *DC) {
16584 unsigned NumBytes = MaskInfo.first;
16585 unsigned ByteShift = MaskInfo.second;
16586 SelectionDAG &DAG = DC->getDAG();
16587
16588 // Check to see if IVal is all zeros in the part being masked in by the 'or'
16589 // that uses this. If not, this is not a replacement.
16590 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16591 ByteShift*8, (ByteShift+NumBytes)*8);
16592 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
1
Assuming the condition is false
2
Taking false branch
16593
16594 // Check that it is legal on the target to do this. It is legal if the new
16595 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16596 // legalization (and the target doesn't explicitly think this is a bad idea).
16597 MVT VT = MVT::getIntegerVT(NumBytes * 8);
16598 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16599 if (!DC->isTypeLegal(VT))
3
Taking false branch
16600 return SDValue();
16601 if (St->getMemOperand() &&
4
Assuming pointer value is null
5
Taking false branch
16602 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16603 *St->getMemOperand()))
16604 return SDValue();
16605
16606 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
16607 // shifted by ByteShift and truncated down to NumBytes.
16608 if (ByteShift) {
6
Assuming 'ByteShift' is 0
7
Taking false branch
16609 SDLoc DL(IVal);
16610 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16611 DAG.getConstant(ByteShift*8, DL,
16612 DC->getShiftAmountTy(IVal.getValueType())));
16613 }
16614
16615 // Figure out the offset for the store and the alignment of the access.
16616 unsigned StOffset;
16617 if (DAG.getDataLayout().isLittleEndian())
8
Taking true branch
16618 StOffset = ByteShift;
16619 else
16620 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16621
16622 SDValue Ptr = St->getBasePtr();
16623 if (StOffset
8.1
'StOffset' is 0
8.1
'StOffset' is 0
) {
9
Taking false branch
16624 SDLoc DL(IVal);
16625 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16626 }
16627
16628 // Truncate down to the new size.
16629 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16630
16631 ++OpsNarrowed;
16632 return DAG
16633 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16634 St->getPointerInfo().getWithOffset(StOffset),
10
Calling 'MemSDNode::getPointerInfo'
16635 St->getOriginalAlign());
16636}
16637
16638/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16639/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16640/// narrowing the load and store if it would end up being a win for performance
16641/// or code size.
16642SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16643 StoreSDNode *ST = cast<StoreSDNode>(N);
16644 if (!ST->isSimple())
16645 return SDValue();
16646
16647 SDValue Chain = ST->getChain();
16648 SDValue Value = ST->getValue();
16649 SDValue Ptr = ST->getBasePtr();
16650 EVT VT = Value.getValueType();
16651
16652 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16653 return SDValue();
16654
16655 unsigned Opc = Value.getOpcode();
16656
16657 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16658 // is a byte mask indicating a consecutive number of bytes, check to see if
16659 // Y is known to provide just those bytes. If so, we try to replace the
16660 // load + replace + store sequence with a single (narrower) store, which makes
16661 // the load dead.
16662 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16663 std::pair<unsigned, unsigned> MaskedLoad;
16664 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16665 if (MaskedLoad.first)
16666 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16667 Value.getOperand(1), ST,this))
16668 return NewST;
16669
16670 // Or is commutative, so try swapping X and Y.
16671 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16672 if (MaskedLoad.first)
16673 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16674 Value.getOperand(0), ST,this))
16675 return NewST;
16676 }
16677
16678 if (!EnableReduceLoadOpStoreWidth)
16679 return SDValue();
16680
16681 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16682 Value.getOperand(1).getOpcode() != ISD::Constant)
16683 return SDValue();
16684
16685 SDValue N0 = Value.getOperand(0);
16686 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16687 Chain == SDValue(N0.getNode(), 1)) {
16688 LoadSDNode *LD = cast<LoadSDNode>(N0);
16689 if (LD->getBasePtr() != Ptr ||
16690 LD->getPointerInfo().getAddrSpace() !=
16691 ST->getPointerInfo().getAddrSpace())
16692 return SDValue();
16693
16694 // Find the type to narrow it the load / op / store to.
16695 SDValue N1 = Value.getOperand(1);
16696 unsigned BitWidth = N1.getValueSizeInBits();
16697 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16698 if (Opc == ISD::AND)
16699 Imm ^= APInt::getAllOnesValue(BitWidth);
16700 if (Imm == 0 || Imm.isAllOnesValue())
16701 return SDValue();
16702 unsigned ShAmt = Imm.countTrailingZeros();
16703 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16704 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16705 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16706 // The narrowing should be profitable, the load/store operation should be
16707 // legal (or custom) and the store size should be equal to the NewVT width.
16708 while (NewBW < BitWidth &&
16709 (NewVT.getStoreSizeInBits() != NewBW ||
16710 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16711 !TLI.isNarrowingProfitable(VT, NewVT))) {
16712 NewBW = NextPowerOf2(NewBW);
16713 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16714 }
16715 if (NewBW >= BitWidth)
16716 return SDValue();
16717
16718 // If the lsb changed does not start at the type bitwidth boundary,
16719 // start at the previous one.
16720 if (ShAmt % NewBW)
16721 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16722 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16723 std::min(BitWidth, ShAmt + NewBW));
16724 if ((Imm & Mask) == Imm) {
16725 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16726 if (Opc == ISD::AND)
16727 NewImm ^= APInt::getAllOnesValue(NewBW);
16728 uint64_t PtrOff = ShAmt / 8;
16729 // For big endian targets, we need to adjust the offset to the pointer to
16730 // load the correct bytes.
16731 if (DAG.getDataLayout().isBigEndian())
16732 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16733
16734 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16735 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16736 if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16737 return SDValue();
16738
16739 SDValue NewPtr =
16740 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16741 SDValue NewLD =
16742 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16743 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16744 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16745 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16746 DAG.getConstant(NewImm, SDLoc(Value),
16747 NewVT));
16748 SDValue NewST =
16749 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16750 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16751
16752 AddToWorklist(NewPtr.getNode());
16753 AddToWorklist(NewLD.getNode());
16754 AddToWorklist(NewVal.getNode());
16755 WorklistRemover DeadNodes(*this);
16756 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16757 ++OpsNarrowed;
16758 return NewST;
16759 }
16760 }
16761
16762 return SDValue();
16763}
16764
16765/// For a given floating point load / store pair, if the load value isn't used
16766/// by any other operations, then consider transforming the pair to integer
16767/// load / store operations if the target deems the transformation profitable.
16768SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16769 StoreSDNode *ST = cast<StoreSDNode>(N);
16770 SDValue Value = ST->getValue();
16771 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16772 Value.hasOneUse()) {
16773 LoadSDNode *LD = cast<LoadSDNode>(Value);
16774 EVT VT = LD->getMemoryVT();
16775 if (!VT.isFloatingPoint() ||
16776 VT != ST->getMemoryVT() ||
16777 LD->isNonTemporal() ||
16778 ST->isNonTemporal() ||
16779 LD->getPointerInfo().getAddrSpace() != 0 ||
16780 ST->getPointerInfo().getAddrSpace() != 0)
16781 return SDValue();
16782
16783 TypeSize VTSize = VT.getSizeInBits();
16784
16785 // We don't know the size of scalable types at compile time so we cannot
16786 // create an integer of the equivalent size.
16787 if (VTSize.isScalable())
16788 return SDValue();
16789
16790 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16791 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16792 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16793 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16794 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16795 return SDValue();
16796
16797 Align LDAlign = LD->getAlign();
16798 Align STAlign = ST->getAlign();
16799 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16800 Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16801 if (LDAlign < ABIAlign || STAlign < ABIAlign)
16802 return SDValue();
16803
16804 SDValue NewLD =
16805 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16806 LD->getPointerInfo(), LDAlign);
16807
16808 SDValue NewST =
16809 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16810 ST->getPointerInfo(), STAlign);
16811
16812 AddToWorklist(NewLD.getNode());
16813 AddToWorklist(NewST.getNode());
16814 WorklistRemover DeadNodes(*this);
16815 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16816 ++LdStFP2Int;
16817 return NewST;
16818 }
16819
16820 return SDValue();
16821}
16822
16823// This is a helper function for visitMUL to check the profitability
16824// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16825// MulNode is the original multiply, AddNode is (add x, c1),
16826// and ConstNode is c2.
16827//
16828// If the (add x, c1) has multiple uses, we could increase
16829// the number of adds if we make this transformation.
16830// It would only be worth doing this if we can remove a
16831// multiply in the process. Check for that here.
16832// To illustrate:
16833// (A + c1) * c3
16834// (A + c2) * c3
16835// We're checking for cases where we have common "c3 * A" expressions.
16836bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16837 SDValue &AddNode,
16838 SDValue &ConstNode) {
16839 APInt Val;
16840
16841 // If the add only has one use, this would be OK to do.
16842 if (AddNode.getNode()->hasOneUse())
16843 return true;
16844
16845 // Walk all the users of the constant with which we're multiplying.
16846 for (SDNode *Use : ConstNode->uses()) {
16847 if (Use == MulNode) // This use is the one we're on right now. Skip it.
16848 continue;
16849
16850 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16851 SDNode *OtherOp;
16852 SDNode *MulVar = AddNode.getOperand(0).getNode();
16853
16854 // OtherOp is what we're multiplying against the constant.
16855 if (Use->getOperand(0) == ConstNode)
16856 OtherOp = Use->getOperand(1).getNode();
16857 else
16858 OtherOp = Use->getOperand(0).getNode();
16859
16860 // Check to see if multiply is with the same operand of our "add".
16861 //
16862 // ConstNode = CONST
16863 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
16864 // ...
16865 // AddNode = (A + c1) <-- MulVar is A.
16866 // = AddNode * ConstNode <-- current visiting instruction.
16867 //
16868 // If we make this transformation, we will have a common
16869 // multiply (ConstNode * A) that we can save.
16870 if (OtherOp == MulVar)
16871 return true;
16872
16873 // Now check to see if a future expansion will give us a common
16874 // multiply.
16875 //
16876 // ConstNode = CONST
16877 // AddNode = (A + c1)
16878 // ... = AddNode * ConstNode <-- current visiting instruction.
16879 // ...
16880 // OtherOp = (A + c2)
16881 // Use = OtherOp * ConstNode <-- visiting Use.
16882 //
16883 // If we make this transformation, we will have a common
16884 // multiply (CONST * A) after we also do the same transformation
16885 // to the "t2" instruction.
16886 if (OtherOp->getOpcode() == ISD::ADD &&
16887 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16888 OtherOp->getOperand(0).getNode() == MulVar)
16889 return true;
16890 }
16891 }
16892
16893 // Didn't find a case where this would be profitable.
16894 return false;
16895}
16896
16897SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16898 unsigned NumStores) {
16899 SmallVector<SDValue, 8> Chains;
16900 SmallPtrSet<const SDNode *, 8> Visited;
16901 SDLoc StoreDL(StoreNodes[0].MemNode);
16902
16903 for (unsigned i = 0; i < NumStores; ++i) {
16904 Visited.insert(StoreNodes[i].MemNode);
16905 }
16906
16907 // don't include nodes that are children or repeated nodes.
16908 for (unsigned i = 0; i < NumStores; ++i) {
16909 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16910 Chains.push_back(StoreNodes[i].MemNode->getChain());
16911 }
16912
16913 assert(Chains.size() > 0 && "Chain should have generated a chain")((void)0);
16914 return DAG.getTokenFactor(StoreDL, Chains);
16915}
16916
16917bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16918 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16919 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16920 // Make sure we have something to merge.
16921 if (NumStores < 2)
16922 return false;
16923
16924 assert((!UseTrunc || !UseVector) &&((void)0)
16925 "This optimization cannot emit a vector truncating store")((void)0);
16926
16927 // The latest Node in the DAG.
16928 SDLoc DL(StoreNodes[0].MemNode);
16929
16930 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16931 unsigned SizeInBits = NumStores * ElementSizeBits;
16932 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16933
16934 EVT StoreTy;
16935 if (UseVector) {
16936 unsigned Elts = NumStores * NumMemElts;
16937 // Get the type for the merged vector store.
16938 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16939 } else
16940 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16941
16942 SDValue StoredVal;
16943 if (UseVector) {
16944 if (IsConstantSrc) {
16945 SmallVector<SDValue, 8> BuildVector;
16946 for (unsigned I = 0; I != NumStores; ++I) {
16947 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16948 SDValue Val = St->getValue();
16949 // If constant is of the wrong type, convert it now.
16950 if (MemVT != Val.getValueType()) {
16951 Val = peekThroughBitcasts(Val);
16952 // Deal with constants of wrong size.
16953 if (ElementSizeBits != Val.getValueSizeInBits()) {
16954 EVT IntMemVT =
16955 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16956 if (isa<ConstantFPSDNode>(Val)) {
16957 // Not clear how to truncate FP values.
16958 return false;
16959 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16960 Val = DAG.getConstant(C->getAPIntValue()
16961 .zextOrTrunc(Val.getValueSizeInBits())
16962 .zextOrTrunc(ElementSizeBits),
16963 SDLoc(C), IntMemVT);
16964 }
16965 // Make sure correctly size type is the correct type.
16966 Val = DAG.getBitcast(MemVT, Val);
16967 }
16968 BuildVector.push_back(Val);
16969 }
16970 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16971 : ISD::BUILD_VECTOR,
16972 DL, StoreTy, BuildVector);
16973 } else {
16974 SmallVector<SDValue, 8> Ops;
16975 for (unsigned i = 0; i < NumStores; ++i) {
16976 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16977 SDValue Val = peekThroughBitcasts(St->getValue());
16978 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16979 // type MemVT. If the underlying value is not the correct
16980 // type, but it is an extraction of an appropriate vector we
16981 // can recast Val to be of the correct type. This may require
16982 // converting between EXTRACT_VECTOR_ELT and
16983 // EXTRACT_SUBVECTOR.
16984 if ((MemVT != Val.getValueType()) &&
16985 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16986 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16987 EVT MemVTScalarTy = MemVT.getScalarType();
16988 // We may need to add a bitcast here to get types to line up.
16989 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16990 Val = DAG.getBitcast(MemVT, Val);
16991 } else {
16992 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16993 : ISD::EXTRACT_VECTOR_ELT;
16994 SDValue Vec = Val.getOperand(0);
16995 SDValue Idx = Val.getOperand(1);
16996 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16997 }
16998 }
16999 Ops.push_back(Val);
17000 }
17001
17002 // Build the extracted vector elements back into a vector.
17003 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17004 : ISD::BUILD_VECTOR,
17005 DL, StoreTy, Ops);
17006 }
17007 } else {
17008 // We should always use a vector store when merging extracted vector
17009 // elements, so this path implies a store of constants.
17010 assert(IsConstantSrc && "Merged vector elements should use vector store")((void)0);
17011
17012 APInt StoreInt(SizeInBits, 0);
17013
17014 // Construct a single integer constant which is made of the smaller
17015 // constant inputs.
17016 bool IsLE = DAG.getDataLayout().isLittleEndian();
17017 for (unsigned i = 0; i < NumStores; ++i) {
17018 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17019 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17020
17021 SDValue Val = St->getValue();
17022 Val = peekThroughBitcasts(Val);
17023 StoreInt <<= ElementSizeBits;
17024 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17025 StoreInt |= C->getAPIntValue()
17026 .zextOrTrunc(ElementSizeBits)
17027 .zextOrTrunc(SizeInBits);
17028 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17029 StoreInt |= C->getValueAPF()
17030 .bitcastToAPInt()
17031 .zextOrTrunc(ElementSizeBits)
17032 .zextOrTrunc(SizeInBits);
17033 // If fp truncation is necessary give up for now.
17034 if (MemVT.getSizeInBits() != ElementSizeBits)
17035 return false;
17036 } else {
17037 llvm_unreachable("Invalid constant element type")__builtin_unreachable();
17038 }
17039 }
17040
17041 // Create the new Load and Store operations.
17042 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17043 }
17044
17045 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17046 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17047
17048 // make sure we use trunc store if it's necessary to be legal.
17049 SDValue NewStore;
17050 if (!UseTrunc) {
17051 NewStore =
17052 DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17053 FirstInChain->getPointerInfo(), FirstInChain->getAlign());
17054 } else { // Must be realized as a trunc store
17055 EVT LegalizedStoredValTy =
17056 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17057 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17058 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17059 SDValue ExtendedStoreVal =
17060 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17061 LegalizedStoredValTy);
17062 NewStore = DAG.getTruncStore(
17063 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17064 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17065 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17066 }
17067
17068 // Replace all merged stores with the new store.
17069 for (unsigned i = 0; i < NumStores; ++i)
17070 CombineTo(StoreNodes[i].MemNode, NewStore);
17071
17072 AddToWorklist(NewChain.getNode());
17073 return true;
17074}
17075
17076void DAGCombiner::getStoreMergeCandidates(
17077 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17078 SDNode *&RootNode) {
17079 // This holds the base pointer, index, and the offset in bytes from the base
17080 // pointer. We must have a base and an offset. Do not handle stores to undef
17081 // base pointers.
17082 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17083 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17084 return;
17085
17086 SDValue Val = peekThroughBitcasts(St->getValue());
17087 StoreSource StoreSrc = getStoreSource(Val);
17088 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")((void)0);
17089
17090 // Match on loadbaseptr if relevant.
17091 EVT MemVT = St->getMemoryVT();
17092 BaseIndexOffset LBasePtr;
17093 EVT LoadVT;
17094 if (StoreSrc == StoreSource::Load) {
17095 auto *Ld = cast<LoadSDNode>(Val);
17096 LBasePtr = BaseIndexOffset::match(Ld, DAG);
17097 LoadVT = Ld->getMemoryVT();
17098 // Load and store should be the same type.
17099 if (MemVT != LoadVT)
17100 return;
17101 // Loads must only have one use.
17102 if (!Ld->hasNUsesOfValue(1, 0))
17103 return;
17104 // The memory operands must not be volatile/indexed/atomic.
17105 // TODO: May be able to relax for unordered atomics (see D66309)
17106 if (!Ld->isSimple() || Ld->isIndexed())
17107 return;
17108 }
17109 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17110 int64_t &Offset) -> bool {
17111 // The memory operands must not be volatile/indexed/atomic.
17112 // TODO: May be able to relax for unordered atomics (see D66309)
17113 if (!Other->isSimple() || Other->isIndexed())
17114 return false;
17115 // Don't mix temporal stores with non-temporal stores.
17116 if (St->isNonTemporal() != Other->isNonTemporal())
17117 return false;
17118 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17119 // Allow merging constants of different types as integers.
17120 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17121 : Other->getMemoryVT() != MemVT;
17122 switch (StoreSrc) {
17123 case StoreSource::Load: {
17124 if (NoTypeMatch)
17125 return false;
17126 // The Load's Base Ptr must also match.
17127 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17128 if (!OtherLd)
17129 return false;
17130 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17131 if (LoadVT != OtherLd->getMemoryVT())
17132 return false;
17133 // Loads must only have one use.
17134 if (!OtherLd->hasNUsesOfValue(1, 0))
17135 return false;
17136 // The memory operands must not be volatile/indexed/atomic.
17137 // TODO: May be able to relax for unordered atomics (see D66309)
17138 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17139 return false;
17140 // Don't mix temporal loads with non-temporal loads.
17141 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17142 return false;
17143 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17144 return false;
17145 break;
17146 }
17147 case StoreSource::Constant:
17148 if (NoTypeMatch)
17149 return false;
17150 if (!isIntOrFPConstant(OtherBC))
17151 return false;
17152 break;
17153 case StoreSource::Extract:
17154 // Do not merge truncated stores here.
17155 if (Other->isTruncatingStore())
17156 return false;
17157 if (!MemVT.bitsEq(OtherBC.getValueType()))
17158 return false;
17159 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17160 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17161 return false;
17162 break;
17163 default:
17164 llvm_unreachable("Unhandled store source for merging")__builtin_unreachable();
17165 }
17166 Ptr = BaseIndexOffset::match(Other, DAG);
17167 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17168 };
17169
17170 // Check if the pair of StoreNode and the RootNode already bail out many
17171 // times which is over the limit in dependence check.
17172 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17173 SDNode *RootNode) -> bool {
17174 auto RootCount = StoreRootCountMap.find(StoreNode);
17175 return RootCount != StoreRootCountMap.end() &&
17176 RootCount->second.first == RootNode &&
17177 RootCount->second.second > StoreMergeDependenceLimit;
17178 };
17179
17180 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17181 // This must be a chain use.
17182 if (UseIter.getOperandNo() != 0)
17183 return;
17184 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17185 BaseIndexOffset Ptr;
17186 int64_t PtrDiff;
17187 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17188 !OverLimitInDependenceCheck(OtherStore, RootNode))
17189 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17190 }
17191 };
17192
17193 // We looking for a root node which is an ancestor to all mergable
17194 // stores. We search up through a load, to our root and then down
17195 // through all children. For instance we will find Store{1,2,3} if
17196 // St is Store1, Store2. or Store3 where the root is not a load
17197 // which always true for nonvolatile ops. TODO: Expand
17198 // the search to find all valid candidates through multiple layers of loads.
17199 //
17200 // Root
17201 // |-------|-------|
17202 // Load Load Store3
17203 // | |
17204 // Store1 Store2
17205 //
17206 // FIXME: We should be able to climb and
17207 // descend TokenFactors to find candidates as well.
17208
17209 RootNode = St->getChain().getNode();
17210
17211 unsigned NumNodesExplored = 0;
17212 const unsigned MaxSearchNodes = 1024;
17213 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17214 RootNode = Ldn->getChain().getNode();
17215 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17216 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17217 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17218 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17219 TryToAddCandidate(I2);
17220 }
17221 }
17222 } else {
17223 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17224 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17225 TryToAddCandidate(I);
17226 }
17227}
17228
17229// We need to check that merging these stores does not cause a loop in
17230// the DAG. Any store candidate may depend on another candidate
17231// indirectly through its operand (we already consider dependencies
17232// through the chain). Check in parallel by searching up from
17233// non-chain operands of candidates.
17234bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17235 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17236 SDNode *RootNode) {
17237 // FIXME: We should be able to truncate a full search of
17238 // predecessors by doing a BFS and keeping tabs the originating
17239 // stores from which worklist nodes come from in a similar way to
17240 // TokenFactor simplfication.
17241
17242 SmallPtrSet<const SDNode *, 32> Visited;
17243 SmallVector<const SDNode *, 8> Worklist;
17244
17245 // RootNode is a predecessor to all candidates so we need not search
17246 // past it. Add RootNode (peeking through TokenFactors). Do not count
17247 // these towards size check.
17248
17249 Worklist.push_back(RootNode);
17250 while (!Worklist.empty()) {
17251 auto N = Worklist.pop_back_val();
17252 if (!Visited.insert(N).second)
17253 continue; // Already present in Visited.
17254 if (N->getOpcode() == ISD::TokenFactor) {
17255 for (SDValue Op : N->ops())
17256 Worklist.push_back(Op.getNode());
17257 }
17258 }
17259
17260 // Don't count pruning nodes towards max.
17261 unsigned int Max = 1024 + Visited.size();
17262 // Search Ops of store candidates.
17263 for (unsigned i = 0; i < NumStores; ++i) {
17264 SDNode *N = StoreNodes[i].MemNode;
17265 // Of the 4 Store Operands:
17266 // * Chain (Op 0) -> We have already considered these
17267 // in candidate selection and can be
17268 // safely ignored
17269 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17270 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17271 // but aren't necessarily fromt the same base node, so
17272 // cycles possible (e.g. via indexed store).
17273 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17274 // non-indexed stores). Not constant on all targets (e.g. ARM)
17275 // and so can participate in a cycle.
17276 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17277 Worklist.push_back(N->getOperand(j).getNode());
17278 }
17279 // Search through DAG. We can stop early if we find a store node.
17280 for (unsigned i = 0; i < NumStores; ++i)
17281 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17282 Max)) {
17283 // If the searching bail out, record the StoreNode and RootNode in the
17284 // StoreRootCountMap. If we have seen the pair many times over a limit,
17285 // we won't add the StoreNode into StoreNodes set again.
17286 if (Visited.size() >= Max) {
17287 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17288 if (RootCount.first == RootNode)
17289 RootCount.second++;
17290 else
17291 RootCount = {RootNode, 1};
17292 }
17293 return false;
17294 }
17295 return true;
17296}
17297
17298unsigned
17299DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17300 int64_t ElementSizeBytes) const {
17301 while (true) {
17302 // Find a store past the width of the first store.
17303 size_t StartIdx = 0;
17304 while ((StartIdx + 1 < StoreNodes.size()) &&
17305 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17306 StoreNodes[StartIdx + 1].OffsetFromBase)
17307 ++StartIdx;
17308
17309 // Bail if we don't have enough candidates to merge.
17310 if (StartIdx + 1 >= StoreNodes.size())
17311 return 0;
17312
17313 // Trim stores that overlapped with the first store.
17314 if (StartIdx)
17315 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17316
17317 // Scan the memory operations on the chain and find the first
17318 // non-consecutive store memory address.
17319 unsigned NumConsecutiveStores = 1;
17320 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17321 // Check that the addresses are consecutive starting from the second
17322 // element in the list of stores.
17323 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17324 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17325 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17326 break;
17327 NumConsecutiveStores = i + 1;
17328 }
17329 if (NumConsecutiveStores > 1)
17330 return NumConsecutiveStores;
17331
17332 // There are no consecutive stores at the start of the list.
17333 // Remove the first store and try again.
17334 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17335 }
17336}
17337
17338bool DAGCombiner::tryStoreMergeOfConstants(
17339 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17340 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17341 LLVMContext &Context = *DAG.getContext();
17342 const DataLayout &DL = DAG.getDataLayout();
17343 int64_t ElementSizeBytes = MemVT.getStoreSize();
17344 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17345 bool MadeChange = false;
17346
17347 // Store the constants into memory as one consecutive store.
17348 while (NumConsecutiveStores >= 2) {
17349 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17350 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17351 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17352 unsigned LastLegalType = 1;
17353 unsigned LastLegalVectorType = 1;
17354 bool LastIntegerTrunc = false;
17355 bool NonZero = false;
17356 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17357 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17358 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17359 SDValue StoredVal = ST->getValue();
17360 bool IsElementZero = false;
17361 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17362 IsElementZero = C->isNullValue();
17363 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17364 IsElementZero = C->getConstantFPValue()->isNullValue();
17365 if (IsElementZero) {
17366 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17367 FirstZeroAfterNonZero = i;
17368 }
17369 NonZero |= !IsElementZero;
17370
17371 // Find a legal type for the constant store.
17372 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17373 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17374 bool IsFast = false;
17375
17376 // Break early when size is too large to be legal.
17377 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17378 break;
17379
17380 if (TLI.isTypeLegal(StoreTy) &&
17381 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17382 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17383 *FirstInChain->getMemOperand(), &IsFast) &&
17384 IsFast) {
17385 LastIntegerTrunc = false;
17386 LastLegalType = i + 1;
17387 // Or check whether a truncstore is legal.
17388 } else if (TLI.getTypeAction(Context, StoreTy) ==
17389 TargetLowering::TypePromoteInteger) {
17390 EVT LegalizedStoredValTy =
17391 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17392 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17393 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17394 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17395 *FirstInChain->getMemOperand(), &IsFast) &&
17396 IsFast) {
17397 LastIntegerTrunc = true;
17398 LastLegalType = i + 1;
17399 }
17400 }
17401
17402 // We only use vectors if the constant is known to be zero or the
17403 // target allows it and the function is not marked with the
17404 // noimplicitfloat attribute.
17405 if ((!NonZero ||
17406 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17407 AllowVectors) {
17408 // Find a legal type for the vector store.
17409 unsigned Elts = (i + 1) * NumMemElts;
17410 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17411 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17412 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17413 TLI.allowsMemoryAccess(Context, DL, Ty,
17414 *FirstInChain->getMemOperand(), &IsFast) &&
17415 IsFast)
17416 LastLegalVectorType = i + 1;
17417 }
17418 }
17419
17420 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17421 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17422 bool UseTrunc = LastIntegerTrunc && !UseVector;
17423
17424 // Check if we found a legal integer type that creates a meaningful
17425 // merge.
17426 if (NumElem < 2) {
17427 // We know that candidate stores are in order and of correct
17428 // shape. While there is no mergeable sequence from the
17429 // beginning one may start later in the sequence. The only
17430 // reason a merge of size N could have failed where another of
17431 // the same size would not have, is if the alignment has
17432 // improved or we've dropped a non-zero value. Drop as many
17433 // candidates as we can here.
17434 unsigned NumSkip = 1;
17435 while ((NumSkip < NumConsecutiveStores) &&
17436 (NumSkip < FirstZeroAfterNonZero) &&
17437 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17438 NumSkip++;
17439
17440 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17441 NumConsecutiveStores -= NumSkip;
17442 continue;
17443 }
17444
17445 // Check that we can merge these candidates without causing a cycle.
17446 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17447 RootNode)) {
17448 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17449 NumConsecutiveStores -= NumElem;
17450 continue;
17451 }
17452
17453 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17454 /*IsConstantSrc*/ true,
17455 UseVector, UseTrunc);
17456
17457 // Remove merged stores for next iteration.
17458 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17459 NumConsecutiveStores -= NumElem;
17460 }
17461 return MadeChange;
17462}
17463
17464bool DAGCombiner::tryStoreMergeOfExtracts(
17465 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17466 EVT MemVT, SDNode *RootNode) {
17467 LLVMContext &Context = *DAG.getContext();
17468 const DataLayout &DL = DAG.getDataLayout();
17469 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17470 bool MadeChange = false;
17471
17472 // Loop on Consecutive Stores on success.
17473 while (NumConsecutiveStores >= 2) {
17474 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17475 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17476 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17477 unsigned NumStoresToMerge = 1;
17478 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17479 // Find a legal type for the vector store.
17480 unsigned Elts = (i + 1) * NumMemElts;
17481 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17482 bool IsFast = false;
17483
17484 // Break early when size is too large to be legal.
17485 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17486 break;
17487
17488 if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17489 TLI.allowsMemoryAccess(Context, DL, Ty,
17490 *FirstInChain->getMemOperand(), &IsFast) &&
17491 IsFast)
17492 NumStoresToMerge = i + 1;
17493 }
17494
17495 // Check if we found a legal integer type creating a meaningful
17496 // merge.
17497 if (NumStoresToMerge < 2) {
17498 // We know that candidate stores are in order and of correct
17499 // shape. While there is no mergeable sequence from the
17500 // beginning one may start later in the sequence. The only
17501 // reason a merge of size N could have failed where another of
17502 // the same size would not have, is if the alignment has
17503 // improved. Drop as many candidates as we can here.
17504 unsigned NumSkip = 1;
17505 while ((NumSkip < NumConsecutiveStores) &&
17506 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17507 NumSkip++;
17508
17509 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17510 NumConsecutiveStores -= NumSkip;
17511 continue;
17512 }
17513
17514 // Check that we can merge these candidates without causing a cycle.
17515 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17516 RootNode)) {
17517 StoreNodes.erase(StoreNodes.begin(),
17518 StoreNodes.begin() + NumStoresToMerge);
17519 NumConsecutiveStores -= NumStoresToMerge;
17520 continue;
17521 }
17522
17523 MadeChange |= mergeStoresOfConstantsOrVecElts(
17524 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17525 /*UseVector*/ true, /*UseTrunc*/ false);
17526
17527 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17528 NumConsecutiveStores -= NumStoresToMerge;
17529 }
17530 return MadeChange;
17531}
17532
17533bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17534 unsigned NumConsecutiveStores, EVT MemVT,
17535 SDNode *RootNode, bool AllowVectors,
17536 bool IsNonTemporalStore,
17537 bool IsNonTemporalLoad) {
17538 LLVMContext &Context = *DAG.getContext();
17539 const DataLayout &DL = DAG.getDataLayout();
17540 int64_t ElementSizeBytes = MemVT.getStoreSize();
17541 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17542 bool MadeChange = false;
17543
17544 // Look for load nodes which are used by the stored values.
17545 SmallVector<MemOpLink, 8> LoadNodes;
17546
17547 // Find acceptable loads. Loads need to have the same chain (token factor),
17548 // must not be zext, volatile, indexed, and they must be consecutive.
17549 BaseIndexOffset LdBasePtr;
17550
17551 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17552 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17553 SDValue Val = peekThroughBitcasts(St->getValue());
17554 LoadSDNode *Ld = cast<LoadSDNode>(Val);
17555
17556 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17557 // If this is not the first ptr that we check.
17558 int64_t LdOffset = 0;
17559 if (LdBasePtr.getBase().getNode()) {
17560 // The base ptr must be the same.
17561 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17562 break;
17563 } else {
17564 // Check that all other base pointers are the same as this one.
17565 LdBasePtr = LdPtr;
17566 }
17567
17568 // We found a potential memory operand to merge.
17569 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17570 }
17571
17572 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17573 Align RequiredAlignment;
17574 bool NeedRotate = false;
17575 if (LoadNodes.size() == 2) {
17576 // If we have load/store pair instructions and we only have two values,
17577 // don't bother merging.
17578 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17579 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17580 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17581 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17582 break;
17583 }
17584 // If the loads are reversed, see if we can rotate the halves into place.
17585 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17586 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17587 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17588 if (Offset0 - Offset1 == ElementSizeBytes &&
17589 (hasOperation(ISD::ROTL, PairVT) ||
17590 hasOperation(ISD::ROTR, PairVT))) {
17591 std::swap(LoadNodes[0], LoadNodes[1]);
17592 NeedRotate = true;
17593 }
17594 }
17595 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17596 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17597 Align FirstStoreAlign = FirstInChain->getAlign();
17598 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17599
17600 // Scan the memory operations on the chain and find the first
17601 // non-consecutive load memory address. These variables hold the index in
17602 // the store node array.
17603
17604 unsigned LastConsecutiveLoad = 1;
17605
17606 // This variable refers to the size and not index in the array.
17607 unsigned LastLegalVectorType = 1;
17608 unsigned LastLegalIntegerType = 1;
17609 bool isDereferenceable = true;
17610 bool DoIntegerTruncate = false;
17611 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17612 SDValue LoadChain = FirstLoad->getChain();
17613 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17614 // All loads must share the same chain.
17615 if (LoadNodes[i].MemNode->getChain() != LoadChain)
17616 break;
17617
17618 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17619 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17620 break;
17621 LastConsecutiveLoad = i;
17622
17623 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17624 isDereferenceable = false;
17625
17626 // Find a legal type for the vector store.
17627 unsigned Elts = (i + 1) * NumMemElts;
17628 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17629
17630 // Break early when size is too large to be legal.
17631 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17632 break;
17633
17634 bool IsFastSt = false;
17635 bool IsFastLd = false;
17636 if (TLI.isTypeLegal(StoreTy) &&
17637 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17638 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17639 *FirstInChain->getMemOperand(), &IsFastSt) &&
17640 IsFastSt &&
17641 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17642 *FirstLoad->getMemOperand(), &IsFastLd) &&
17643 IsFastLd) {
17644 LastLegalVectorType = i + 1;
17645 }
17646
17647 // Find a legal type for the integer store.
17648 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17649 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17650 if (TLI.isTypeLegal(StoreTy) &&
17651 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17652 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17653 *FirstInChain->getMemOperand(), &IsFastSt) &&
17654 IsFastSt &&
17655 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17656 *FirstLoad->getMemOperand(), &IsFastLd) &&
17657 IsFastLd) {
17658 LastLegalIntegerType = i + 1;
17659 DoIntegerTruncate = false;
17660 // Or check whether a truncstore and extload is legal.
17661 } else if (TLI.getTypeAction(Context, StoreTy) ==
17662 TargetLowering::TypePromoteInteger) {
17663 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17664 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17665 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17666 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17667 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17668 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17669 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17670 *FirstInChain->getMemOperand(), &IsFastSt) &&
17671 IsFastSt &&
17672 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17673 *FirstLoad->getMemOperand(), &IsFastLd) &&
17674 IsFastLd) {
17675 LastLegalIntegerType = i + 1;
17676 DoIntegerTruncate = true;
17677 }
17678 }
17679 }
17680
17681 // Only use vector types if the vector type is larger than the integer
17682 // type. If they are the same, use integers.
17683 bool UseVectorTy =
17684 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17685 unsigned LastLegalType =
17686 std::max(LastLegalVectorType, LastLegalIntegerType);
17687
17688 // We add +1 here because the LastXXX variables refer to location while
17689 // the NumElem refers to array/index size.
17690 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17691 NumElem = std::min(LastLegalType, NumElem);
17692 Align FirstLoadAlign = FirstLoad->getAlign();
17693
17694 if (NumElem < 2) {
17695 // We know that candidate stores are in order and of correct
17696 // shape. While there is no mergeable sequence from the
17697 // beginning one may start later in the sequence. The only
17698 // reason a merge of size N could have failed where another of
17699 // the same size would not have is if the alignment or either
17700 // the load or store has improved. Drop as many candidates as we
17701 // can here.
17702 unsigned NumSkip = 1;
17703 while ((NumSkip < LoadNodes.size()) &&
17704 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17705 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17706 NumSkip++;
17707 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17708 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17709 NumConsecutiveStores -= NumSkip;
17710 continue;
17711 }
17712
17713 // Check that we can merge these candidates without causing a cycle.
17714 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17715 RootNode)) {
17716 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17717 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17718 NumConsecutiveStores -= NumElem;
17719 continue;
17720 }
17721
17722 // Find if it is better to use vectors or integers to load and store
17723 // to memory.
17724 EVT JointMemOpVT;
17725 if (UseVectorTy) {
17726 // Find a legal type for the vector store.
17727 unsigned Elts = NumElem * NumMemElts;
17728 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17729 } else {
17730 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17731 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17732 }
17733
17734 SDLoc LoadDL(LoadNodes[0].MemNode);
17735 SDLoc StoreDL(StoreNodes[0].MemNode);
17736
17737 // The merged loads are required to have the same incoming chain, so
17738 // using the first's chain is acceptable.
17739
17740 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17741 AddToWorklist(NewStoreChain.getNode());
17742
17743 MachineMemOperand::Flags LdMMOFlags =
17744 isDereferenceable ? MachineMemOperand::MODereferenceable
17745 : MachineMemOperand::MONone;
17746 if (IsNonTemporalLoad)
17747 LdMMOFlags |= MachineMemOperand::MONonTemporal;
17748
17749 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17750 ? MachineMemOperand::MONonTemporal
17751 : MachineMemOperand::MONone;
17752
17753 SDValue NewLoad, NewStore;
17754 if (UseVectorTy || !DoIntegerTruncate) {
17755 NewLoad = DAG.getLoad(
17756 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17757 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17758 SDValue StoreOp = NewLoad;
17759 if (NeedRotate) {
17760 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17761 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&((void)0)
17762 "Unexpected type for rotate-able load pair")((void)0);
17763 SDValue RotAmt =
17764 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17765 // Target can convert to the identical ROTR if it does not have ROTL.
17766 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17767 }
17768 NewStore = DAG.getStore(
17769 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17770 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17771 } else { // This must be the truncstore/extload case
17772 EVT ExtendedTy =
17773 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17774 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17775 FirstLoad->getChain(), FirstLoad->getBasePtr(),
17776 FirstLoad->getPointerInfo(), JointMemOpVT,
17777 FirstLoadAlign, LdMMOFlags);
17778 NewStore = DAG.getTruncStore(
17779 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17780 FirstInChain->getPointerInfo(), JointMemOpVT,
17781 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17782 }
17783
17784 // Transfer chain users from old loads to the new load.
17785 for (unsigned i = 0; i < NumElem; ++i) {
17786 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17787 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17788 SDValue(NewLoad.getNode(), 1));
17789 }
17790
17791 // Replace all stores with the new store. Recursively remove corresponding
17792 // values if they are no longer used.
17793 for (unsigned i = 0; i < NumElem; ++i) {
17794 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17795 CombineTo(StoreNodes[i].MemNode, NewStore);
17796 if (Val.getNode()->use_empty())
17797 recursivelyDeleteUnusedNodes(Val.getNode());
17798 }
17799
17800 MadeChange = true;
17801 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17802 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17803 NumConsecutiveStores -= NumElem;
17804 }
17805 return MadeChange;
17806}
17807
17808bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17809 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17810 return false;
17811
17812 // TODO: Extend this function to merge stores of scalable vectors.
17813 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17814 // store since we know <vscale x 16 x i8> is exactly twice as large as
17815 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17816 EVT MemVT = St->getMemoryVT();
17817 if (MemVT.isScalableVector())
17818 return false;
17819 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17820 return false;
17821
17822 // This function cannot currently deal with non-byte-sized memory sizes.
17823 int64_t ElementSizeBytes = MemVT.getStoreSize();
17824 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17825 return false;
17826
17827 // Do not bother looking at stored values that are not constants, loads, or
17828 // extracted vector elements.
17829 SDValue StoredVal = peekThroughBitcasts(St->getValue());
17830 const StoreSource StoreSrc = getStoreSource(StoredVal);
17831 if (StoreSrc == StoreSource::Unknown)
17832 return false;
17833
17834 SmallVector<MemOpLink, 8> StoreNodes;
17835 SDNode *RootNode;
17836 // Find potential store merge candidates by searching through chain sub-DAG
17837 getStoreMergeCandidates(St, StoreNodes, RootNode);
17838
17839 // Check if there is anything to merge.
17840 if (StoreNodes.size() < 2)
17841 return false;
17842
17843 // Sort the memory operands according to their distance from the
17844 // base pointer.
17845 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17846 return LHS.OffsetFromBase < RHS.OffsetFromBase;
17847 });
17848
17849 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17850 Attribute::NoImplicitFloat);
17851 bool IsNonTemporalStore = St->isNonTemporal();
17852 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17853 cast<LoadSDNode>(StoredVal)->isNonTemporal();
17854
17855 // Store Merge attempts to merge the lowest stores. This generally
17856 // works out as if successful, as the remaining stores are checked
17857 // after the first collection of stores is merged. However, in the
17858 // case that a non-mergeable store is found first, e.g., {p[-2],
17859 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17860 // mergeable cases. To prevent this, we prune such stores from the
17861 // front of StoreNodes here.
17862 bool MadeChange = false;
17863 while (StoreNodes.size() > 1) {
17864 unsigned NumConsecutiveStores =
17865 getConsecutiveStores(StoreNodes, ElementSizeBytes);
17866 // There are no more stores in the list to examine.
17867 if (NumConsecutiveStores == 0)
17868 return MadeChange;
17869
17870 // We have at least 2 consecutive stores. Try to merge them.
17871 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")((void)0);
17872 switch (StoreSrc) {
17873 case StoreSource::Constant:
17874 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17875 MemVT, RootNode, AllowVectors);
17876 break;
17877
17878 case StoreSource::Extract:
17879 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17880 MemVT, RootNode);
17881 break;
17882
17883 case StoreSource::Load:
17884 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17885 MemVT, RootNode, AllowVectors,
17886 IsNonTemporalStore, IsNonTemporalLoad);
17887 break;
17888
17889 default:
17890 llvm_unreachable("Unhandled store source type")__builtin_unreachable();
17891 }
17892 }
17893 return MadeChange;
17894}
17895
17896SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17897 SDLoc SL(ST);
17898 SDValue ReplStore;
17899
17900 // Replace the chain to avoid dependency.
17901 if (ST->isTruncatingStore()) {
17902 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17903 ST->getBasePtr(), ST->getMemoryVT(),
17904 ST->getMemOperand());
17905 } else {
17906 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17907 ST->getMemOperand());
17908 }
17909
17910 // Create token to keep both nodes around.
17911 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17912 MVT::Other, ST->getChain(), ReplStore);
17913
17914 // Make sure the new and old chains are cleaned up.
17915 AddToWorklist(Token.getNode());
17916
17917 // Don't add users to work list.
17918 return CombineTo(ST, Token, false);
17919}
17920
17921SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17922 SDValue Value = ST->getValue();
17923 if (Value.getOpcode() == ISD::TargetConstantFP)
17924 return SDValue();
17925
17926 if (!ISD::isNormalStore(ST))
17927 return SDValue();
17928
17929 SDLoc DL(ST);
17930
17931 SDValue Chain = ST->getChain();
17932 SDValue Ptr = ST->getBasePtr();
17933
17934 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17935
17936 // NOTE: If the original store is volatile, this transform must not increase
17937 // the number of stores. For example, on x86-32 an f64 can be stored in one
17938 // processor operation but an i64 (which is not legal) requires two. So the
17939 // transform should not be done in this case.
17940
17941 SDValue Tmp;
17942 switch (CFP->getSimpleValueType(0).SimpleTy) {
17943 default:
17944 llvm_unreachable("Unknown FP type")__builtin_unreachable();
17945 case MVT::f16: // We don't do this for these yet.
17946 case MVT::f80:
17947 case MVT::f128:
17948 case MVT::ppcf128:
17949 return SDValue();
17950 case MVT::f32:
17951 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17952 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17953 ;
17954 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17955 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17956 MVT::i32);
17957 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17958 }
17959
17960 return SDValue();
17961 case MVT::f64:
17962 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17963 ST->isSimple()) ||
17964 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17965 ;
17966 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17967 getZExtValue(), SDLoc(CFP), MVT::i64);
17968 return DAG.getStore(Chain, DL, Tmp,
17969 Ptr, ST->getMemOperand());
17970 }
17971
17972 if (ST->isSimple() &&
17973 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17974 // Many FP stores are not made apparent until after legalize, e.g. for
17975 // argument passing. Since this is so common, custom legalize the
17976 // 64-bit integer store into two 32-bit stores.
17977 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17978 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17979 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17980 if (DAG.getDataLayout().isBigEndian())
17981 std::swap(Lo, Hi);
17982
17983 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17984 AAMDNodes AAInfo = ST->getAAInfo();
17985
17986 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17987 ST->getOriginalAlign(), MMOFlags, AAInfo);
17988 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17989 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17990 ST->getPointerInfo().getWithOffset(4),
17991 ST->getOriginalAlign(), MMOFlags, AAInfo);
17992 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17993 St0, St1);
17994 }
17995
17996 return SDValue();
17997 }
17998}
17999
18000SDValue DAGCombiner::visitSTORE(SDNode *N) {
18001 StoreSDNode *ST = cast<StoreSDNode>(N);
18002 SDValue Chain = ST->getChain();
18003 SDValue Value = ST->getValue();
18004 SDValue Ptr = ST->getBasePtr();
18005
18006 // If this is a store of a bit convert, store the input value if the
18007 // resultant store does not need a higher alignment than the original.
18008 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18009 ST->isUnindexed()) {
18010 EVT SVT = Value.getOperand(0).getValueType();
18011 // If the store is volatile, we only want to change the store type if the
18012 // resulting store is legal. Otherwise we might increase the number of
18013 // memory accesses. We don't care if the original type was legal or not
18014 // as we assume software couldn't rely on the number of accesses of an
18015 // illegal type.
18016 // TODO: May be able to relax for unordered atomics (see D66309)
18017 if (((!LegalOperations && ST->isSimple()) ||
18018 TLI.isOperationLegal(ISD::STORE, SVT)) &&
18019 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18020 DAG, *ST->getMemOperand())) {
18021 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18022 ST->getMemOperand());
18023 }
18024 }
18025
18026 // Turn 'store undef, Ptr' -> nothing.
18027 if (Value.isUndef() && ST->isUnindexed())
18028 return Chain;
18029
18030 // Try to infer better alignment information than the store already has.
18031 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18032 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18033 if (*Alignment > ST->getAlign() &&
18034 isAligned(*Alignment, ST->getSrcValueOffset())) {
18035 SDValue NewStore =
18036 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18037 ST->getMemoryVT(), *Alignment,
18038 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18039 // NewStore will always be N as we are only refining the alignment
18040 assert(NewStore.getNode() == N)((void)0);
18041 (void)NewStore;
18042 }
18043 }
18044 }
18045
18046 // Try transforming a pair floating point load / store ops to integer
18047 // load / store ops.
18048 if (SDValue NewST = TransformFPLoadStorePair(N))
18049 return NewST;
18050
18051 // Try transforming several stores into STORE (BSWAP).
18052 if (SDValue Store = mergeTruncStores(ST))
18053 return Store;
18054
18055 if (ST->isUnindexed()) {
18056 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18057 // adjacent stores.
18058 if (findBetterNeighborChains(ST)) {
18059 // replaceStoreChain uses CombineTo, which handled all of the worklist
18060 // manipulation. Return the original node to not do anything else.
18061 return SDValue(ST, 0);
18062 }
18063 Chain = ST->getChain();
18064 }
18065
18066 // FIXME: is there such a thing as a truncating indexed store?
18067 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18068 Value.getValueType().isInteger() &&
18069 (!isa<ConstantSDNode>(Value) ||
18070 !cast<ConstantSDNode>(Value)->isOpaque())) {
18071 APInt TruncDemandedBits =
18072 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18073 ST->getMemoryVT().getScalarSizeInBits());
18074
18075 // See if we can simplify the input to this truncstore with knowledge that
18076 // only the low bits are being used. For example:
18077 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18078 AddToWorklist(Value.getNode());
18079 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18080 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18081 ST->getMemOperand());
18082
18083 // Otherwise, see if we can simplify the operation with
18084 // SimplifyDemandedBits, which only works if the value has a single use.
18085 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18086 // Re-visit the store if anything changed and the store hasn't been merged
18087 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18088 // node back to the worklist if necessary, but we also need to re-visit
18089 // the Store node itself.
18090 if (N->getOpcode() != ISD::DELETED_NODE)
18091 AddToWorklist(N);
18092 return SDValue(N, 0);
18093 }
18094 }
18095
18096 // If this is a load followed by a store to the same location, then the store
18097 // is dead/noop.
18098 // TODO: Can relax for unordered atomics (see D66309)
18099 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18100 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18101 ST->isUnindexed() && ST->isSimple() &&
18102 Ld->getAddressSpace() == ST->getAddressSpace() &&
18103 // There can't be any side effects between the load and store, such as
18104 // a call or store.
18105 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18106 // The store is dead, remove it.
18107 return Chain;
18108 }
18109 }
18110
18111 // TODO: Can relax for unordered atomics (see D66309)
18112 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18113 if (ST->isUnindexed() && ST->isSimple() &&
18114 ST1->isUnindexed() && ST1->isSimple()) {
18115 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18116 ST->getMemoryVT() == ST1->getMemoryVT() &&
18117 ST->getAddressSpace() == ST1->getAddressSpace()) {
18118 // If this is a store followed by a store with the same value to the
18119 // same location, then the store is dead/noop.
18120 return Chain;
18121 }
18122
18123 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18124 !ST1->getBasePtr().isUndef() &&
18125 // BaseIndexOffset and the code below requires knowing the size
18126 // of a vector, so bail out if MemoryVT is scalable.
18127 !ST->getMemoryVT().isScalableVector() &&
18128 !ST1->getMemoryVT().isScalableVector() &&
18129 ST->getAddressSpace() == ST1->getAddressSpace()) {
18130 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18131 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18132 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18133 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18134 // If this is a store who's preceding store to a subset of the current
18135 // location and no one other node is chained to that store we can
18136 // effectively drop the store. Do not remove stores to undef as they may
18137 // be used as data sinks.
18138 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18139 CombineTo(ST1, ST1->getChain());
18140 return SDValue();
18141 }
18142 }
18143 }
18144 }
18145
18146 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18147 // truncating store. We can do this even if this is already a truncstore.
18148 if ((Value.getOpcode() == ISD::FP_ROUND ||
18149 Value.getOpcode() == ISD::TRUNCATE) &&
18150 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18151 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18152 ST->getMemoryVT(), LegalOperations)) {
18153 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18154 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18155 }
18156
18157 // Always perform this optimization before types are legal. If the target
18158 // prefers, also try this after legalization to catch stores that were created
18159 // by intrinsics or other nodes.
18160 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18161 while (true) {
18162 // There can be multiple store sequences on the same chain.
18163 // Keep trying to merge store sequences until we are unable to do so
18164 // or until we merge the last store on the chain.
18165 bool Changed = mergeConsecutiveStores(ST);
18166 if (!Changed) break;
18167 // Return N as merge only uses CombineTo and no worklist clean
18168 // up is necessary.
18169 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18170 return SDValue(N, 0);
18171 }
18172 }
18173
18174 // Try transforming N to an indexed store.
18175 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18176 return SDValue(N, 0);
18177
18178 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18179 //
18180 // Make sure to do this only after attempting to merge stores in order to
18181 // avoid changing the types of some subset of stores due to visit order,
18182 // preventing their merging.
18183 if (isa<ConstantFPSDNode>(ST->getValue())) {
18184 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18185 return NewSt;
18186 }
18187
18188 if (SDValue NewSt = splitMergedValStore(ST))
18189 return NewSt;
18190
18191 return ReduceLoadOpStoreWidth(N);
18192}
18193
18194SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18195 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18196 if (!LifetimeEnd->hasOffset())
18197 return SDValue();
18198
18199 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18200 LifetimeEnd->getOffset(), false);
18201
18202 // We walk up the chains to find stores.
18203 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18204 while (!Chains.empty()) {
18205 SDValue Chain = Chains.pop_back_val();
18206 if (!Chain.hasOneUse())
18207 continue;
18208 switch (Chain.getOpcode()) {
18209 case ISD::TokenFactor:
18210 for (unsigned Nops = Chain.getNumOperands(); Nops;)
18211 Chains.push_back(Chain.getOperand(--Nops));
18212 break;
18213 case ISD::LIFETIME_START:
18214 case ISD::LIFETIME_END:
18215 // We can forward past any lifetime start/end that can be proven not to
18216 // alias the node.
18217 if (!isAlias(Chain.getNode(), N))
18218 Chains.push_back(Chain.getOperand(0));
18219 break;
18220 case ISD::STORE: {
18221 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18222 // TODO: Can relax for unordered atomics (see D66309)
18223 if (!ST->isSimple() || ST->isIndexed())
18224 continue;
18225 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18226 // The bounds of a scalable store are not known until runtime, so this
18227 // store cannot be elided.
18228 if (StoreSize.isScalable())
18229 continue;
18230 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18231 // If we store purely within object bounds just before its lifetime ends,
18232 // we can remove the store.
18233 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18234 StoreSize.getFixedSize() * 8)) {
18235 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { } while (false)
18236 dbgs() << "\nwithin LIFETIME_END of : ";do { } while (false)
18237 LifetimeEndBase.dump(); dbgs() << "\n")do { } while (false);
18238 CombineTo(ST, ST->getChain());
18239 return SDValue(N, 0);
18240 }
18241 }
18242 }
18243 }
18244 return SDValue();
18245}
18246
18247/// For the instruction sequence of store below, F and I values
18248/// are bundled together as an i64 value before being stored into memory.
18249/// Sometimes it is more efficent to generate separate stores for F and I,
18250/// which can remove the bitwise instructions or sink them to colder places.
18251///
18252/// (store (or (zext (bitcast F to i32) to i64),
18253/// (shl (zext I to i64), 32)), addr) -->
18254/// (store F, addr) and (store I, addr+4)
18255///
18256/// Similarly, splitting for other merged store can also be beneficial, like:
18257/// For pair of {i32, i32}, i64 store --> two i32 stores.
18258/// For pair of {i32, i16}, i64 store --> two i32 stores.
18259/// For pair of {i16, i16}, i32 store --> two i16 stores.
18260/// For pair of {i16, i8}, i32 store --> two i16 stores.
18261/// For pair of {i8, i8}, i16 store --> two i8 stores.
18262///
18263/// We allow each target to determine specifically which kind of splitting is
18264/// supported.
18265///
18266/// The store patterns are commonly seen from the simple code snippet below
18267/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18268/// void goo(const std::pair<int, float> &);
18269/// hoo() {
18270/// ...
18271/// goo(std::make_pair(tmp, ftmp));
18272/// ...
18273/// }
18274///
18275SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18276 if (OptLevel == CodeGenOpt::None)
18277 return SDValue();
18278
18279 // Can't change the number of memory accesses for a volatile store or break
18280 // atomicity for an atomic one.
18281 if (!ST->isSimple())
18282 return SDValue();
18283
18284 SDValue Val = ST->getValue();
18285 SDLoc DL(ST);
18286
18287 // Match OR operand.
18288 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18289 return SDValue();
18290
18291 // Match SHL operand and get Lower and Higher parts of Val.
18292 SDValue Op1 = Val.getOperand(0);
18293 SDValue Op2 = Val.getOperand(1);
18294 SDValue Lo, Hi;
18295 if (Op1.getOpcode() != ISD::SHL) {
18296 std::swap(Op1, Op2);
18297 if (Op1.getOpcode() != ISD::SHL)
18298 return SDValue();
18299 }
18300 Lo = Op2;
18301 Hi = Op1.getOperand(0);
18302 if (!Op1.hasOneUse())
18303 return SDValue();
18304
18305 // Match shift amount to HalfValBitSize.
18306 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18307 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18308 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18309 return SDValue();
18310
18311 // Lo and Hi are zero-extended from int with size less equal than 32
18312 // to i64.
18313 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18314 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18315 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18316 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18317 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18318 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18319 return SDValue();
18320
18321 // Use the EVT of low and high parts before bitcast as the input
18322 // of target query.
18323 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18324 ? Lo.getOperand(0).getValueType()
18325 : Lo.getValueType();
18326 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18327 ? Hi.getOperand(0).getValueType()
18328 : Hi.getValueType();
18329 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18330 return SDValue();
18331
18332 // Start to split store.
18333 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18334 AAMDNodes AAInfo = ST->getAAInfo();
18335
18336 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18337 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18338 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18339 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18340
18341 SDValue Chain = ST->getChain();
18342 SDValue Ptr = ST->getBasePtr();
18343 // Lower value store.
18344 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18345 ST->getOriginalAlign(), MMOFlags, AAInfo);
18346 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18347 // Higher value store.
18348 SDValue St1 = DAG.getStore(
18349 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18350 ST->getOriginalAlign(), MMOFlags, AAInfo);
18351 return St1;
18352}
18353
18354/// Convert a disguised subvector insertion into a shuffle:
18355SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18356 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&((void)0)
18357 "Expected extract_vector_elt")((void)0);
18358 SDValue InsertVal = N->getOperand(1);
18359 SDValue Vec = N->getOperand(0);
18360
18361 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18362 // InsIndex)
18363 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18364 // CONCAT_VECTORS.
18365 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18366 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18367 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18368 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18369 ArrayRef<int> Mask = SVN->getMask();
18370
18371 SDValue X = Vec.getOperand(0);
18372 SDValue Y = Vec.getOperand(1);
18373
18374 // Vec's operand 0 is using indices from 0 to N-1 and
18375 // operand 1 from N to 2N - 1, where N is the number of
18376 // elements in the vectors.
18377 SDValue InsertVal0 = InsertVal.getOperand(0);
18378 int ElementOffset = -1;
18379
18380 // We explore the inputs of the shuffle in order to see if we find the
18381 // source of the extract_vector_elt. If so, we can use it to modify the
18382 // shuffle rather than perform an insert_vector_elt.
18383 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18384 ArgWorkList.emplace_back(Mask.size(), Y);
18385 ArgWorkList.emplace_back(0, X);
18386
18387 while (!ArgWorkList.empty()) {
18388 int ArgOffset;
18389 SDValue ArgVal;
18390 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18391
18392 if (ArgVal == InsertVal0) {
18393 ElementOffset = ArgOffset;
18394 break;
18395 }
18396
18397 // Peek through concat_vector.
18398 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18399 int CurrentArgOffset =
18400 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18401 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18402 for (SDValue Op : reverse(ArgVal->ops())) {
18403 CurrentArgOffset -= Step;
18404 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18405 }
18406
18407 // Make sure we went through all the elements and did not screw up index
18408 // computation.
18409 assert(CurrentArgOffset == ArgOffset)((void)0);
18410 }
18411 }
18412
18413 if (ElementOffset != -1) {
18414 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18415
18416 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18417 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18418 assert(NewMask[InsIndex] <((void)0)
18419 (int)(2 * Vec.getValueType().getVectorNumElements()) &&((void)0)
18420 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")((void)0);
18421
18422 SDValue LegalShuffle =
18423 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18424 Y, NewMask, DAG);
18425 if (LegalShuffle)
18426 return LegalShuffle;
18427 }
18428 }
18429
18430 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18431 // bitcast(shuffle (bitcast V), (extended X), Mask)
18432 // Note: We do not use an insert_subvector node because that requires a
18433 // legal subvector type.
18434 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18435 !InsertVal.getOperand(0).getValueType().isVector())
18436 return SDValue();
18437
18438 SDValue SubVec = InsertVal.getOperand(0);
18439 SDValue DestVec = N->getOperand(0);
18440 EVT SubVecVT = SubVec.getValueType();
18441 EVT VT = DestVec.getValueType();
18442 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18443 // If the source only has a single vector element, the cost of creating adding
18444 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18445 if (NumSrcElts == 1)
18446 return SDValue();
18447 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18448 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18449
18450 // Step 1: Create a shuffle mask that implements this insert operation. The
18451 // vector that we are inserting into will be operand 0 of the shuffle, so
18452 // those elements are just 'i'. The inserted subvector is in the first
18453 // positions of operand 1 of the shuffle. Example:
18454 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18455 SmallVector<int, 16> Mask(NumMaskVals);
18456 for (unsigned i = 0; i != NumMaskVals; ++i) {
18457 if (i / NumSrcElts == InsIndex)
18458 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18459 else
18460 Mask[i] = i;
18461 }
18462
18463 // Bail out if the target can not handle the shuffle we want to create.
18464 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18465 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18466 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18467 return SDValue();
18468
18469 // Step 2: Create a wide vector from the inserted source vector by appending
18470 // undefined elements. This is the same size as our destination vector.
18471 SDLoc DL(N);
18472 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18473 ConcatOps[0] = SubVec;
18474 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18475
18476 // Step 3: Shuffle in the padded subvector.
18477 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18478 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18479 AddToWorklist(PaddedSubV.getNode());
18480 AddToWorklist(DestVecBC.getNode());
18481 AddToWorklist(Shuf.getNode());
18482 return DAG.getBitcast(VT, Shuf);
18483}
18484
18485SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18486 SDValue InVec = N->getOperand(0);
18487 SDValue InVal = N->getOperand(1);
18488 SDValue EltNo = N->getOperand(2);
18489 SDLoc DL(N);
18490
18491 EVT VT = InVec.getValueType();
18492 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18493
18494 // Insert into out-of-bounds element is undefined.
18495 if (IndexC && VT.isFixedLengthVector() &&
18496 IndexC->getZExtValue() >= VT.getVectorNumElements())
18497 return DAG.getUNDEF(VT);
18498
18499 // Remove redundant insertions:
18500 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18501 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18502 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18503 return InVec;
18504
18505 if (!IndexC) {
18506 // If this is variable insert to undef vector, it might be better to splat:
18507 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18508 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18509 if (VT.isScalableVector())
18510 return DAG.getSplatVector(VT, DL, InVal);
18511 else {
18512 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18513 return DAG.getBuildVector(VT, DL, Ops);
18514 }
18515 }
18516 return SDValue();
18517 }
18518
18519 if (VT.isScalableVector())
18520 return SDValue();
18521
18522 unsigned NumElts = VT.getVectorNumElements();
18523
18524 // We must know which element is being inserted for folds below here.
18525 unsigned Elt = IndexC->getZExtValue();
18526 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18527 return Shuf;
18528
18529 // Canonicalize insert_vector_elt dag nodes.
18530 // Example:
18531 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18532 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18533 //
18534 // Do this only if the child insert_vector node has one use; also
18535 // do this only if indices are both constants and Idx1 < Idx0.
18536 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18537 && isa<ConstantSDNode>(InVec.getOperand(2))) {
18538 unsigned OtherElt = InVec.getConstantOperandVal(2);
18539 if (Elt < OtherElt) {
18540 // Swap nodes.
18541 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18542 InVec.getOperand(0), InVal, EltNo);
18543 AddToWorklist(NewOp.getNode());
18544 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18545 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18546 }
18547 }
18548
18549 // If we can't generate a legal BUILD_VECTOR, exit
18550 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18551 return SDValue();
18552
18553 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18554 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18555 // vector elements.
18556 SmallVector<SDValue, 8> Ops;
18557 // Do not combine these two vectors if the output vector will not replace
18558 // the input vector.
18559 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18560 Ops.append(InVec.getNode()->op_begin(),
18561 InVec.getNode()->op_end());
18562 } else if (InVec.isUndef()) {
18563 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18564 } else {
18565 return SDValue();
18566 }
18567 assert(Ops.size() == NumElts && "Unexpected vector size")((void)0);
18568
18569 // Insert the element
18570 if (Elt < Ops.size()) {
18571 // All the operands of BUILD_VECTOR must have the same type;
18572 // we enforce that here.
18573 EVT OpVT = Ops[0].getValueType();
18574 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18575 }
18576
18577 // Return the new vector
18578 return DAG.getBuildVector(VT, DL, Ops);
18579}
18580
18581SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18582 SDValue EltNo,
18583 LoadSDNode *OriginalLoad) {
18584 assert(OriginalLoad->isSimple())((void)0);
18585
18586 EVT ResultVT = EVE->getValueType(0);
18587 EVT VecEltVT = InVecVT.getVectorElementType();
18588
18589 // If the vector element type is not a multiple of a byte then we are unable
18590 // to correctly compute an address to load only the extracted element as a
18591 // scalar.
18592 if (!VecEltVT.isByteSized())
18593 return SDValue();
18594
18595 Align Alignment = OriginalLoad->getAlign();
18596 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18597 VecEltVT.getTypeForEVT(*DAG.getContext()));
18598
18599 if (NewAlign > Alignment ||
18600 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18601 return SDValue();
18602
18603 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18604 ISD::NON_EXTLOAD : ISD::EXTLOAD;
18605 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18606 return SDValue();
18607
18608 Alignment = NewAlign;
18609
18610 MachinePointerInfo MPI;
18611 SDLoc DL(EVE);
18612 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18613 int Elt = ConstEltNo->getZExtValue();
18614 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18615 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18616 } else {
18617 // Discard the pointer info except the address space because the memory
18618 // operand can't represent this new access since the offset is variable.
18619 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18620 }
18621 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18622 InVecVT, EltNo);
18623
18624 // The replacement we need to do here is a little tricky: we need to
18625 // replace an extractelement of a load with a load.
18626 // Use ReplaceAllUsesOfValuesWith to do the replacement.
18627 // Note that this replacement assumes that the extractvalue is the only
18628 // use of the load; that's okay because we don't want to perform this
18629 // transformation in other cases anyway.
18630 SDValue Load;
18631 SDValue Chain;
18632 if (ResultVT.bitsGT(VecEltVT)) {
18633 // If the result type of vextract is wider than the load, then issue an
18634 // extending load instead.
18635 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18636 VecEltVT)
18637 ? ISD::ZEXTLOAD
18638 : ISD::EXTLOAD;
18639 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18640 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18641 Alignment, OriginalLoad->getMemOperand()->getFlags(),
18642 OriginalLoad->getAAInfo());
18643 Chain = Load.getValue(1);
18644 } else {
18645 Load = DAG.getLoad(
18646 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18647 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18648 Chain = Load.getValue(1);
18649 if (ResultVT.bitsLT(VecEltVT))
18650 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18651 else
18652 Load = DAG.getBitcast(ResultVT, Load);
18653 }
18654 WorklistRemover DeadNodes(*this);
18655 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18656 SDValue To[] = { Load, Chain };
18657 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18658 // Make sure to revisit this node to clean it up; it will usually be dead.
18659 AddToWorklist(EVE);
18660 // Since we're explicitly calling ReplaceAllUses, add the new node to the
18661 // worklist explicitly as well.
18662 AddToWorklistWithUsers(Load.getNode());
18663 ++OpsNarrowed;
18664 return SDValue(EVE, 0);
18665}
18666
18667/// Transform a vector binary operation into a scalar binary operation by moving
18668/// the math/logic after an extract element of a vector.
18669static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18670 bool LegalOperations) {
18671 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18672 SDValue Vec = ExtElt->getOperand(0);
18673 SDValue Index = ExtElt->getOperand(1);
18674 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18675 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18676 Vec.getNode()->getNumValues() != 1)
18677 return SDValue();
18678
18679 // Targets may want to avoid this to prevent an expensive register transfer.
18680 if (!TLI.shouldScalarizeBinop(Vec))
18681 return SDValue();
18682
18683 // Extracting an element of a vector constant is constant-folded, so this
18684 // transform is just replacing a vector op with a scalar op while moving the
18685 // extract.
18686 SDValue Op0 = Vec.getOperand(0);
18687 SDValue Op1 = Vec.getOperand(1);
18688 if (isAnyConstantBuildVector(Op0, true) ||
18689 isAnyConstantBuildVector(Op1, true)) {
18690 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18691 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18692 SDLoc DL(ExtElt);
18693 EVT VT = ExtElt->getValueType(0);
18694 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18695 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18696 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18697 }
18698
18699 return SDValue();
18700}
18701
18702SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18703 SDValue VecOp = N->getOperand(0);
18704 SDValue Index = N->getOperand(1);
18705 EVT ScalarVT = N->getValueType(0);
18706 EVT VecVT = VecOp.getValueType();
18707 if (VecOp.isUndef())
18708 return DAG.getUNDEF(ScalarVT);
18709
18710 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18711 //
18712 // This only really matters if the index is non-constant since other combines
18713 // on the constant elements already work.
18714 SDLoc DL(N);
18715 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18716 Index == VecOp.getOperand(2)) {
18717 SDValue Elt = VecOp.getOperand(1);
18718 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18719 }
18720
18721 // (vextract (scalar_to_vector val, 0) -> val
18722 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18723 // Only 0'th element of SCALAR_TO_VECTOR is defined.
18724 if (DAG.isKnownNeverZero(Index))
18725 return DAG.getUNDEF(ScalarVT);
18726
18727 // Check if the result type doesn't match the inserted element type. A
18728 // SCALAR_TO_VECTOR may truncate the inserted element and the
18729 // EXTRACT_VECTOR_ELT may widen the extracted vector.
18730 SDValue InOp = VecOp.getOperand(0);
18731 if (InOp.getValueType() != ScalarVT) {
18732 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((void)0);
18733 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18734 }
18735 return InOp;
18736 }
18737
18738 // extract_vector_elt of out-of-bounds element -> UNDEF
18739 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18740 if (IndexC && VecVT.isFixedLengthVector() &&
18741 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18742 return DAG.getUNDEF(ScalarVT);
18743
18744 // extract_vector_elt (build_vector x, y), 1 -> y
18745 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18746 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18747 TLI.isTypeLegal(VecVT) &&
18748 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18749 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||((void)0)
18750 VecVT.isFixedLengthVector()) &&((void)0)
18751 "BUILD_VECTOR used for scalable vectors")((void)0);
18752 unsigned IndexVal =
18753 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18754 SDValue Elt = VecOp.getOperand(IndexVal);
18755 EVT InEltVT = Elt.getValueType();
18756
18757 // Sometimes build_vector's scalar input types do not match result type.
18758 if (ScalarVT == InEltVT)
18759 return Elt;
18760
18761 // TODO: It may be useful to truncate if free if the build_vector implicitly
18762 // converts.
18763 }
18764
18765 if (VecVT.isScalableVector())
18766 return SDValue();
18767
18768 // All the code from this point onwards assumes fixed width vectors, but it's
18769 // possible that some of the combinations could be made to work for scalable
18770 // vectors too.
18771 unsigned NumElts = VecVT.getVectorNumElements();
18772 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18773
18774 // TODO: These transforms should not require the 'hasOneUse' restriction, but
18775 // there are regressions on multiple targets without it. We can end up with a
18776 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18777 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18778 VecOp.hasOneUse()) {
18779 // The vector index of the LSBs of the source depend on the endian-ness.
18780 bool IsLE = DAG.getDataLayout().isLittleEndian();
18781 unsigned ExtractIndex = IndexC->getZExtValue();
18782 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18783 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18784 SDValue BCSrc = VecOp.getOperand(0);
18785 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18786 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18787
18788 if (LegalTypes && BCSrc.getValueType().isInteger() &&
18789 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18790 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18791 // trunc i64 X to i32
18792 SDValue X = BCSrc.getOperand(0);
18793 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&((void)0)
18794 "Extract element and scalar to vector can't change element type "((void)0)
18795 "from FP to integer.")((void)0);
18796 unsigned XBitWidth = X.getValueSizeInBits();
18797 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18798
18799 // An extract element return value type can be wider than its vector
18800 // operand element type. In that case, the high bits are undefined, so
18801 // it's possible that we may need to extend rather than truncate.
18802 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18803 assert(XBitWidth % VecEltBitWidth == 0 &&((void)0)
18804 "Scalar bitwidth must be a multiple of vector element bitwidth")((void)0);
18805 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18806 }
18807 }
18808 }
18809
18810 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18811 return BO;
18812
18813 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18814 // We only perform this optimization before the op legalization phase because
18815 // we may introduce new vector instructions which are not backed by TD
18816 // patterns. For example on AVX, extracting elements from a wide vector
18817 // without using extract_subvector. However, if we can find an underlying
18818 // scalar value, then we can always use that.
18819 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18820 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18821 // Find the new index to extract from.
18822 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18823
18824 // Extracting an undef index is undef.
18825 if (OrigElt == -1)
18826 return DAG.getUNDEF(ScalarVT);
18827
18828 // Select the right vector half to extract from.
18829 SDValue SVInVec;
18830 if (OrigElt < (int)NumElts) {
18831 SVInVec = VecOp.getOperand(0);
18832 } else {
18833 SVInVec = VecOp.getOperand(1);
18834 OrigElt -= NumElts;
18835 }
18836
18837 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18838 SDValue InOp = SVInVec.getOperand(OrigElt);
18839 if (InOp.getValueType() != ScalarVT) {
18840 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((void)0);
18841 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18842 }
18843
18844 return InOp;
18845 }
18846
18847 // FIXME: We should handle recursing on other vector shuffles and
18848 // scalar_to_vector here as well.
18849
18850 if (!LegalOperations ||
18851 // FIXME: Should really be just isOperationLegalOrCustom.
18852 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18853 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18854 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18855 DAG.getVectorIdxConstant(OrigElt, DL));
18856 }
18857 }
18858
18859 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18860 // simplify it based on the (valid) extraction indices.
18861 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18862 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18863 Use->getOperand(0) == VecOp &&
18864 isa<ConstantSDNode>(Use->getOperand(1));
18865 })) {
18866 APInt DemandedElts = APInt::getNullValue(NumElts);
18867 for (SDNode *Use : VecOp->uses()) {
18868 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18869 if (CstElt->getAPIntValue().ult(NumElts))
18870 DemandedElts.setBit(CstElt->getZExtValue());
18871 }
18872 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18873 // We simplified the vector operand of this extract element. If this
18874 // extract is not dead, visit it again so it is folded properly.
18875 if (N->getOpcode() != ISD::DELETED_NODE)
18876 AddToWorklist(N);
18877 return SDValue(N, 0);
18878 }
18879 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18880 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18881 // We simplified the vector operand of this extract element. If this
18882 // extract is not dead, visit it again so it is folded properly.
18883 if (N->getOpcode() != ISD::DELETED_NODE)
18884 AddToWorklist(N);
18885 return SDValue(N, 0);
18886 }
18887 }
18888
18889 // Everything under here is trying to match an extract of a loaded value.
18890 // If the result of load has to be truncated, then it's not necessarily
18891 // profitable.
18892 bool BCNumEltsChanged = false;
18893 EVT ExtVT = VecVT.getVectorElementType();
18894 EVT LVT = ExtVT;
18895 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18896 return SDValue();
18897
18898 if (VecOp.getOpcode() == ISD::BITCAST) {
18899 // Don't duplicate a load with other uses.
18900 if (!VecOp.hasOneUse())
18901 return SDValue();
18902
18903 EVT BCVT = VecOp.getOperand(0).getValueType();
18904 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18905 return SDValue();
18906 if (NumElts != BCVT.getVectorNumElements())
18907 BCNumEltsChanged = true;
18908 VecOp = VecOp.getOperand(0);
18909 ExtVT = BCVT.getVectorElementType();
18910 }
18911
18912 // extract (vector load $addr), i --> load $addr + i * size
18913 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18914 ISD::isNormalLoad(VecOp.getNode()) &&
18915 !Index->hasPredecessor(VecOp.getNode())) {
18916 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18917 if (VecLoad && VecLoad->isSimple())
18918 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18919 }
18920
18921 // Perform only after legalization to ensure build_vector / vector_shuffle
18922 // optimizations have already been done.
18923 if (!LegalOperations || !IndexC)
18924 return SDValue();
18925
18926 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18927 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18928 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18929 int Elt = IndexC->getZExtValue();
18930 LoadSDNode *LN0 = nullptr;
18931 if (ISD::isNormalLoad(VecOp.getNode())) {
18932 LN0 = cast<LoadSDNode>(VecOp);
18933 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18934 VecOp.getOperand(0).getValueType() == ExtVT &&
18935 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18936 // Don't duplicate a load with other uses.
18937 if (!VecOp.hasOneUse())
18938 return SDValue();
18939
18940 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18941 }
18942 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18943 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18944 // =>
18945 // (load $addr+1*size)
18946
18947 // Don't duplicate a load with other uses.
18948 if (!VecOp.hasOneUse())
18949 return SDValue();
18950
18951 // If the bit convert changed the number of elements, it is unsafe
18952 // to examine the mask.
18953 if (BCNumEltsChanged)
18954 return SDValue();
18955
18956 // Select the input vector, guarding against out of range extract vector.
18957 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18958 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18959
18960 if (VecOp.getOpcode() == ISD::BITCAST) {
18961 // Don't duplicate a load with other uses.
18962 if (!VecOp.hasOneUse())
18963 return SDValue();
18964
18965 VecOp = VecOp.getOperand(0);
18966 }
18967 if (ISD::isNormalLoad(VecOp.getNode())) {
18968 LN0 = cast<LoadSDNode>(VecOp);
18969 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18970 Index = DAG.getConstant(Elt, DL, Index.getValueType());
18971 }
18972 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18973 VecVT.getVectorElementType() == ScalarVT &&
18974 (!LegalTypes ||
18975 TLI.isTypeLegal(
18976 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18977 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18978 // -> extract_vector_elt a, 0
18979 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18980 // -> extract_vector_elt a, 1
18981 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18982 // -> extract_vector_elt b, 0
18983 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18984 // -> extract_vector_elt b, 1
18985 SDLoc SL(N);
18986 EVT ConcatVT = VecOp.getOperand(0).getValueType();
18987 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18988 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18989 Index.getValueType());
18990
18991 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18992 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18993 ConcatVT.getVectorElementType(),
18994 ConcatOp, NewIdx);
18995 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18996 }
18997
18998 // Make sure we found a non-volatile load and the extractelement is
18999 // the only use.
19000 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19001 return SDValue();
19002
19003 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19004 if (Elt == -1)
19005 return DAG.getUNDEF(LVT);
19006
19007 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19008}
19009
19010// Simplify (build_vec (ext )) to (bitcast (build_vec ))
19011SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19012 // We perform this optimization post type-legalization because
19013 // the type-legalizer often scalarizes integer-promoted vectors.
19014 // Performing this optimization before may create bit-casts which
19015 // will be type-legalized to complex code sequences.
19016 // We perform this optimization only before the operation legalizer because we
19017 // may introduce illegal operations.
19018 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19019 return SDValue();
19020
19021 unsigned NumInScalars = N->getNumOperands();
19022 SDLoc DL(N);
19023 EVT VT = N->getValueType(0);
19024
19025 // Check to see if this is a BUILD_VECTOR of a bunch of values
19026 // which come from any_extend or zero_extend nodes. If so, we can create
19027 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19028 // optimizations. We do not handle sign-extend because we can't fill the sign
19029 // using shuffles.
19030 EVT SourceType = MVT::Other;
19031 bool AllAnyExt = true;
19032
19033 for (unsigned i = 0; i != NumInScalars; ++i) {
19034 SDValue In = N->getOperand(i);
19035 // Ignore undef inputs.
19036 if (In.isUndef()) continue;
19037
19038 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19039 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19040
19041 // Abort if the element is not an extension.
19042 if (!ZeroExt && !AnyExt) {
19043 SourceType = MVT::Other;
19044 break;
19045 }
19046
19047 // The input is a ZeroExt or AnyExt. Check the original type.
19048 EVT InTy = In.getOperand(0).getValueType();
19049
19050 // Check that all of the widened source types are the same.
19051 if (SourceType == MVT::Other)
19052 // First time.
19053 SourceType = InTy;
19054 else if (InTy != SourceType) {
19055 // Multiple income types. Abort.
19056 SourceType = MVT::Other;
19057 break;
19058 }
19059
19060 // Check if all of the extends are ANY_EXTENDs.
19061 AllAnyExt &= AnyExt;
19062 }
19063
19064 // In order to have valid types, all of the inputs must be extended from the
19065 // same source type and all of the inputs must be any or zero extend.
19066 // Scalar sizes must be a power of two.
19067 EVT OutScalarTy = VT.getScalarType();
19068 bool ValidTypes = SourceType != MVT::Other &&
19069 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19070 isPowerOf2_32(SourceType.getSizeInBits());
19071
19072 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19073 // turn into a single shuffle instruction.
19074 if (!ValidTypes)
19075 return SDValue();
19076
19077 // If we already have a splat buildvector, then don't fold it if it means
19078 // introducing zeros.
19079 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19080 return SDValue();
19081
19082 bool isLE = DAG.getDataLayout().isLittleEndian();
19083 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19084 assert(ElemRatio > 1 && "Invalid element size ratio")((void)0);
19085 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19086 DAG.getConstant(0, DL, SourceType);
19087
19088 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19089 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19090
19091 // Populate the new build_vector
19092 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19093 SDValue Cast = N->getOperand(i);
19094 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||((void)0)
19095 Cast.getOpcode() == ISD::ZERO_EXTEND ||((void)0)
19096 Cast.isUndef()) && "Invalid cast opcode")((void)0);
19097 SDValue In;
19098 if (Cast.isUndef())
19099 In = DAG.getUNDEF(SourceType);
19100 else
19101 In = Cast->getOperand(0);
19102 unsigned Index = isLE ? (i * ElemRatio) :
19103 (i * ElemRatio + (ElemRatio - 1));
19104
19105 assert(Index < Ops.size() && "Invalid index")((void)0);
19106 Ops[Index] = In;
19107 }
19108
19109 // The type of the new BUILD_VECTOR node.
19110 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19111 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&((void)0)
19112 "Invalid vector size")((void)0);
19113 // Check if the new vector type is legal.
19114 if (!isTypeLegal(VecVT) ||
19115 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19116 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19117 return SDValue();
19118
19119 // Make the new BUILD_VECTOR.
19120 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19121
19122 // The new BUILD_VECTOR node has the potential to be further optimized.
19123 AddToWorklist(BV.getNode());
19124 // Bitcast to the desired type.
19125 return DAG.getBitcast(VT, BV);
19126}
19127
19128// Simplify (build_vec (trunc $1)
19129// (trunc (srl $1 half-width))
19130// (trunc (srl $1 (2 * half-width))) …)
19131// to (bitcast $1)
19132SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19133 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((void)0);
19134
19135 // Only for little endian
19136 if (!DAG.getDataLayout().isLittleEndian())
19137 return SDValue();
19138
19139 SDLoc DL(N);
19140 EVT VT = N->getValueType(0);
19141 EVT OutScalarTy = VT.getScalarType();
19142 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19143
19144 // Only for power of two types to be sure that bitcast works well
19145 if (!isPowerOf2_64(ScalarTypeBitsize))
19146 return SDValue();
19147
19148 unsigned NumInScalars = N->getNumOperands();
19149
19150 // Look through bitcasts
19151 auto PeekThroughBitcast = [](SDValue Op) {
19152 if (Op.getOpcode() == ISD::BITCAST)
19153 return Op.getOperand(0);
19154 return Op;
19155 };
19156
19157 // The source value where all the parts are extracted.
19158 SDValue Src;
19159 for (unsigned i = 0; i != NumInScalars; ++i) {
19160 SDValue In = PeekThroughBitcast(N->getOperand(i));
19161 // Ignore undef inputs.
19162 if (In.isUndef()) continue;
19163
19164 if (In.getOpcode() != ISD::TRUNCATE)
19165 return SDValue();
19166
19167 In = PeekThroughBitcast(In.getOperand(0));
19168
19169 if (In.getOpcode() != ISD::SRL) {
19170 // For now only build_vec without shuffling, handle shifts here in the
19171 // future.
19172 if (i != 0)
19173 return SDValue();
19174
19175 Src = In;
19176 } else {
19177 // In is SRL
19178 SDValue part = PeekThroughBitcast(In.getOperand(0));
19179
19180 if (!Src) {
19181 Src = part;
19182 } else if (Src != part) {
19183 // Vector parts do not stem from the same variable
19184 return SDValue();
19185 }
19186
19187 SDValue ShiftAmtVal = In.getOperand(1);
19188 if (!isa<ConstantSDNode>(ShiftAmtVal))
19189 return SDValue();
19190
19191 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19192
19193 // The extracted value is not extracted at the right position
19194 if (ShiftAmt != i * ScalarTypeBitsize)
19195 return SDValue();
19196 }
19197 }
19198
19199 // Only cast if the size is the same
19200 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19201 return SDValue();
19202
19203 return DAG.getBitcast(VT, Src);
19204}
19205
19206SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19207 ArrayRef<int> VectorMask,
19208 SDValue VecIn1, SDValue VecIn2,
19209 unsigned LeftIdx, bool DidSplitVec) {
19210 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19211
19212 EVT VT = N->getValueType(0);
19213 EVT InVT1 = VecIn1.getValueType();
19214 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19215
19216 unsigned NumElems = VT.getVectorNumElements();
19217 unsigned ShuffleNumElems = NumElems;
19218
19219 // If we artificially split a vector in two already, then the offsets in the
19220 // operands will all be based off of VecIn1, even those in VecIn2.
19221 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19222
19223 uint64_t VTSize = VT.getFixedSizeInBits();
19224 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19225 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19226
19227 assert(InVT2Size <= InVT1Size &&((void)0)
19228 "Inputs must be sorted to be in non-increasing vector size order.")((void)0);
19229
19230 // We can't generate a shuffle node with mismatched input and output types.
19231 // Try to make the types match the type of the output.
19232 if (InVT1 != VT || InVT2 != VT) {
19233 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19234 // If the output vector length is a multiple of both input lengths,
19235 // we can concatenate them and pad the rest with undefs.
19236 unsigned NumConcats = VTSize / InVT1Size;
19237 assert(NumConcats >= 2 && "Concat needs at least two inputs!")((void)0);
19238 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19239 ConcatOps[0] = VecIn1;
19240 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19241 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19242 VecIn2 = SDValue();
19243 } else if (InVT1Size == VTSize * 2) {
19244 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19245 return SDValue();
19246
19247 if (!VecIn2.getNode()) {
19248 // If we only have one input vector, and it's twice the size of the
19249 // output, split it in two.
19250 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19251 DAG.getVectorIdxConstant(NumElems, DL));
19252 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19253 // Since we now have shorter input vectors, adjust the offset of the
19254 // second vector's start.
19255 Vec2Offset = NumElems;
19256 } else {
19257 assert(InVT2Size <= InVT1Size &&((void)0)
19258 "Second input is not going to be larger than the first one.")((void)0);
19259
19260 // VecIn1 is wider than the output, and we have another, possibly
19261 // smaller input. Pad the smaller input with undefs, shuffle at the
19262 // input vector width, and extract the output.
19263 // The shuffle type is different than VT, so check legality again.
19264 if (LegalOperations &&
19265 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19266 return SDValue();
19267
19268 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19269 // lower it back into a BUILD_VECTOR. So if the inserted type is
19270 // illegal, don't even try.
19271 if (InVT1 != InVT2) {
19272 if (!TLI.isTypeLegal(InVT2))
19273 return SDValue();
19274 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19275 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19276 }
19277 ShuffleNumElems = NumElems * 2;
19278 }
19279 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19280 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19281 ConcatOps[0] = VecIn2;
19282 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19283 } else {
19284 // TODO: Support cases where the length mismatch isn't exactly by a
19285 // factor of 2.
19286 // TODO: Move this check upwards, so that if we have bad type
19287 // mismatches, we don't create any DAG nodes.
19288 return SDValue();
19289 }
19290 }
19291
19292 // Initialize mask to undef.
19293 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19294
19295 // Only need to run up to the number of elements actually used, not the
19296 // total number of elements in the shuffle - if we are shuffling a wider
19297 // vector, the high lanes should be set to undef.
19298 for (unsigned i = 0; i != NumElems; ++i) {
19299 if (VectorMask[i] <= 0)
19300 continue;
19301
19302 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19303 if (VectorMask[i] == (int)LeftIdx) {
19304 Mask[i] = ExtIndex;
19305 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19306 Mask[i] = Vec2Offset + ExtIndex;
19307 }
19308 }
19309
19310 // The type the input vectors may have changed above.
19311 InVT1 = VecIn1.getValueType();
19312
19313 // If we already have a VecIn2, it should have the same type as VecIn1.
19314 // If we don't, get an undef/zero vector of the appropriate type.
19315 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19316 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")((void)0);
19317
19318 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19319 if (ShuffleNumElems > NumElems)
19320 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19321
19322 return Shuffle;
19323}
19324
19325static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19326 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((void)0);
19327
19328 // First, determine where the build vector is not undef.
19329 // TODO: We could extend this to handle zero elements as well as undefs.
19330 int NumBVOps = BV->getNumOperands();
19331 int ZextElt = -1;
19332 for (int i = 0; i != NumBVOps; ++i) {
19333 SDValue Op = BV->getOperand(i);
19334 if (Op.isUndef())
19335 continue;
19336 if (ZextElt == -1)
19337 ZextElt = i;
19338 else
19339 return SDValue();
19340 }
19341 // Bail out if there's no non-undef element.
19342 if (ZextElt == -1)
19343 return SDValue();
19344
19345 // The build vector contains some number of undef elements and exactly
19346 // one other element. That other element must be a zero-extended scalar
19347 // extracted from a vector at a constant index to turn this into a shuffle.
19348 // Also, require that the build vector does not implicitly truncate/extend
19349 // its elements.
19350 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19351 EVT VT = BV->getValueType(0);
19352 SDValue Zext = BV->getOperand(ZextElt);
19353 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19354 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19355 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19356 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19357 return SDValue();
19358
19359 // The zero-extend must be a multiple of the source size, and we must be
19360 // building a vector of the same size as the source of the extract element.
19361 SDValue Extract = Zext.getOperand(0);
19362 unsigned DestSize = Zext.getValueSizeInBits();
19363 unsigned SrcSize = Extract.getValueSizeInBits();
19364 if (DestSize % SrcSize != 0 ||
19365 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19366 return SDValue();
19367
19368 // Create a shuffle mask that will combine the extracted element with zeros
19369 // and undefs.
19370 int ZextRatio = DestSize / SrcSize;
19371 int NumMaskElts = NumBVOps * ZextRatio;
19372 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19373 for (int i = 0; i != NumMaskElts; ++i) {
19374 if (i / ZextRatio == ZextElt) {
19375 // The low bits of the (potentially translated) extracted element map to
19376 // the source vector. The high bits map to zero. We will use a zero vector
19377 // as the 2nd source operand of the shuffle, so use the 1st element of
19378 // that vector (mask value is number-of-elements) for the high bits.
19379 if (i % ZextRatio == 0)
19380 ShufMask[i] = Extract.getConstantOperandVal(1);
19381 else
19382 ShufMask[i] = NumMaskElts;
19383 }
19384
19385 // Undef elements of the build vector remain undef because we initialize
19386 // the shuffle mask with -1.
19387 }
19388
19389 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19390 // bitcast (shuffle V, ZeroVec, VectorMask)
19391 SDLoc DL(BV);
19392 EVT VecVT = Extract.getOperand(0).getValueType();
19393 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19394 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19395 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19396 ZeroVec, ShufMask, DAG);
19397 if (!Shuf)
19398 return SDValue();
19399 return DAG.getBitcast(VT, Shuf);
19400}
19401
19402// FIXME: promote to STLExtras.
19403template <typename R, typename T>
19404static auto getFirstIndexOf(R &&Range, const T &Val) {
19405 auto I = find(Range, Val);
19406 if (I == Range.end())
19407 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19408 return std::distance(Range.begin(), I);
19409}
19410
19411// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19412// operations. If the types of the vectors we're extracting from allow it,
19413// turn this into a vector_shuffle node.
19414SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19415 SDLoc DL(N);
19416 EVT VT = N->getValueType(0);
19417
19418 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19419 if (!isTypeLegal(VT))
19420 return SDValue();
19421
19422 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19423 return V;
19424
19425 // May only combine to shuffle after legalize if shuffle is legal.
19426 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19427 return SDValue();
19428
19429 bool UsesZeroVector = false;
19430 unsigned NumElems = N->getNumOperands();
19431
19432 // Record, for each element of the newly built vector, which input vector
19433 // that element comes from. -1 stands for undef, 0 for the zero vector,
19434 // and positive values for the input vectors.
19435 // VectorMask maps each element to its vector number, and VecIn maps vector
19436 // numbers to their initial SDValues.
19437
19438 SmallVector<int, 8> VectorMask(NumElems, -1);
19439 SmallVector<SDValue, 8> VecIn;
19440 VecIn.push_back(SDValue());
19441
19442 for (unsigned i = 0; i != NumElems; ++i) {
19443 SDValue Op = N->getOperand(i);
19444
19445 if (Op.isUndef())
19446 continue;
19447
19448 // See if we can use a blend with a zero vector.
19449 // TODO: Should we generalize this to a blend with an arbitrary constant
19450 // vector?
19451 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19452 UsesZeroVector = true;
19453 VectorMask[i] = 0;
19454 continue;
19455 }
19456
19457 // Not an undef or zero. If the input is something other than an
19458 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19459 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19460 !isa<ConstantSDNode>(Op.getOperand(1)))
19461 return SDValue();
19462 SDValue ExtractedFromVec = Op.getOperand(0);
19463
19464 if (ExtractedFromVec.getValueType().isScalableVector())
19465 return SDValue();
19466
19467 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19468 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19469 return SDValue();
19470
19471 // All inputs must have the same element type as the output.
19472 if (VT.getVectorElementType() !=
19473 ExtractedFromVec.getValueType().getVectorElementType())
19474 return SDValue();
19475
19476 // Have we seen this input vector before?
19477 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19478 // a map back from SDValues to numbers isn't worth it.
19479 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19480 if (Idx == -1) { // A new source vector?
19481 Idx = VecIn.size();
19482 VecIn.push_back(ExtractedFromVec);
19483 }
19484
19485 VectorMask[i] = Idx;
19486 }
19487
19488 // If we didn't find at least one input vector, bail out.
19489 if (VecIn.size() < 2)
19490 return SDValue();
19491
19492 // If all the Operands of BUILD_VECTOR extract from same
19493 // vector, then split the vector efficiently based on the maximum
19494 // vector access index and adjust the VectorMask and
19495 // VecIn accordingly.
19496 bool DidSplitVec = false;
19497 if (VecIn.size() == 2) {
19498 unsigned MaxIndex = 0;
19499 unsigned NearestPow2 = 0;
19500 SDValue Vec = VecIn.back();
19501 EVT InVT = Vec.getValueType();
19502 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19503
19504 for (unsigned i = 0; i < NumElems; i++) {
19505 if (VectorMask[i] <= 0)
19506 continue;
19507 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19508 IndexVec[i] = Index;
19509 MaxIndex = std::max(MaxIndex, Index);
19510 }
19511
19512 NearestPow2 = PowerOf2Ceil(MaxIndex);
19513 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19514 NumElems * 2 < NearestPow2) {
19515 unsigned SplitSize = NearestPow2 / 2;
19516 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19517 InVT.getVectorElementType(), SplitSize);
19518 if (TLI.isTypeLegal(SplitVT) &&
19519 SplitSize + SplitVT.getVectorNumElements() <=
19520 InVT.getVectorNumElements()) {
19521 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19522 DAG.getVectorIdxConstant(SplitSize, DL));
19523 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19524 DAG.getVectorIdxConstant(0, DL));
19525 VecIn.pop_back();
19526 VecIn.push_back(VecIn1);
19527 VecIn.push_back(VecIn2);
19528 DidSplitVec = true;
19529
19530 for (unsigned i = 0; i < NumElems; i++) {
19531 if (VectorMask[i] <= 0)
19532 continue;
19533 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19534 }
19535 }
19536 }
19537 }
19538
19539 // Sort input vectors by decreasing vector element count,
19540 // while preserving the relative order of equally-sized vectors.
19541 // Note that we keep the first "implicit zero vector as-is.
19542 SmallVector<SDValue, 8> SortedVecIn(VecIn);
19543 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19544 [](const SDValue &a, const SDValue &b) {
19545 return a.getValueType().getVectorNumElements() >
19546 b.getValueType().getVectorNumElements();
19547 });
19548
19549 // We now also need to rebuild the VectorMask, because it referenced element
19550 // order in VecIn, and we just sorted them.
19551 for (int &SourceVectorIndex : VectorMask) {
19552 if (SourceVectorIndex <= 0)
19553 continue;
19554 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19555 assert(Idx > 0 && Idx < SortedVecIn.size() &&((void)0)
19556 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure")((void)0);
19557 SourceVectorIndex = Idx;
19558 }
19559
19560 VecIn = std::move(SortedVecIn);
19561
19562 // TODO: Should this fire if some of the input vectors has illegal type (like
19563 // it does now), or should we let legalization run its course first?
19564
19565 // Shuffle phase:
19566 // Take pairs of vectors, and shuffle them so that the result has elements
19567 // from these vectors in the correct places.
19568 // For example, given:
19569 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19570 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19571 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19572 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19573 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19574 // We will generate:
19575 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19576 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19577 SmallVector<SDValue, 4> Shuffles;
19578 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19579 unsigned LeftIdx = 2 * In + 1;
19580 SDValue VecLeft = VecIn[LeftIdx];
19581 SDValue VecRight =
19582 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19583
19584 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19585 VecRight, LeftIdx, DidSplitVec))
19586 Shuffles.push_back(Shuffle);
19587 else
19588 return SDValue();
19589 }
19590
19591 // If we need the zero vector as an "ingredient" in the blend tree, add it
19592 // to the list of shuffles.
19593 if (UsesZeroVector)
19594 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19595 : DAG.getConstantFP(0.0, DL, VT));
19596
19597 // If we only have one shuffle, we're done.
19598 if (Shuffles.size() == 1)
19599 return Shuffles[0];
19600
19601 // Update the vector mask to point to the post-shuffle vectors.
19602 for (int &Vec : VectorMask)
19603 if (Vec == 0)
19604 Vec = Shuffles.size() - 1;
19605 else
19606 Vec = (Vec - 1) / 2;
19607
19608 // More than one shuffle. Generate a binary tree of blends, e.g. if from
19609 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19610 // generate:
19611 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19612 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19613 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19614 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19615 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19616 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19617 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19618
19619 // Make sure the initial size of the shuffle list is even.
19620 if (Shuffles.size() % 2)
19621 Shuffles.push_back(DAG.getUNDEF(VT));
19622
19623 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19624 if (CurSize % 2) {
19625 Shuffles[CurSize] = DAG.getUNDEF(VT);
19626 CurSize++;
19627 }
19628 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19629 int Left = 2 * In;
19630 int Right = 2 * In + 1;
19631 SmallVector<int, 8> Mask(NumElems, -1);
19632 for (unsigned i = 0; i != NumElems; ++i) {
19633 if (VectorMask[i] == Left) {
19634 Mask[i] = i;
19635 VectorMask[i] = In;
19636 } else if (VectorMask[i] == Right) {
19637 Mask[i] = i + NumElems;
19638 VectorMask[i] = In;
19639 }
19640 }
19641
19642 Shuffles[In] =
19643 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19644 }
19645 }
19646 return Shuffles[0];
19647}
19648
19649// Try to turn a build vector of zero extends of extract vector elts into a
19650// a vector zero extend and possibly an extract subvector.
19651// TODO: Support sign extend?
19652// TODO: Allow undef elements?
19653SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19654 if (LegalOperations)
19655 return SDValue();
19656
19657 EVT VT = N->getValueType(0);
19658
19659 bool FoundZeroExtend = false;
19660 SDValue Op0 = N->getOperand(0);
19661 auto checkElem = [&](SDValue Op) -> int64_t {
19662 unsigned Opc = Op.getOpcode();
19663 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19664 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19665 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19666 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19667 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19668 return C->getZExtValue();
19669 return -1;
19670 };
19671
19672 // Make sure the first element matches
19673 // (zext (extract_vector_elt X, C))
19674 int64_t Offset = checkElem(Op0);
19675 if (Offset < 0)
19676 return SDValue();
19677
19678 unsigned NumElems = N->getNumOperands();
19679 SDValue In = Op0.getOperand(0).getOperand(0);
19680 EVT InSVT = In.getValueType().getScalarType();
19681 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19682
19683 // Don't create an illegal input type after type legalization.
19684 if (LegalTypes && !TLI.isTypeLegal(InVT))
19685 return SDValue();
19686
19687 // Ensure all the elements come from the same vector and are adjacent.
19688 for (unsigned i = 1; i != NumElems; ++i) {
19689 if ((Offset + i) != checkElem(N->getOperand(i)))
19690 return SDValue();
19691 }
19692
19693 SDLoc DL(N);
19694 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19695 Op0.getOperand(0).getOperand(1));
19696 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19697 VT, In);
19698}
19699
19700SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19701 EVT VT = N->getValueType(0);
19702
19703 // A vector built entirely of undefs is undef.
19704 if (ISD::allOperandsUndef(N))
19705 return DAG.getUNDEF(VT);
19706
19707 // If this is a splat of a bitcast from another vector, change to a
19708 // concat_vector.
19709 // For example:
19710 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19711 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19712 //
19713 // If X is a build_vector itself, the concat can become a larger build_vector.
19714 // TODO: Maybe this is useful for non-splat too?
19715 if (!LegalOperations) {
19716 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19717 Splat = peekThroughBitcasts(Splat);
19718 EVT SrcVT = Splat.getValueType();
19719 if (SrcVT.isVector()) {
19720 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19721 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19722 SrcVT.getVectorElementType(), NumElts);
19723 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19724 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19725 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19726 NewVT, Ops);
19727 return DAG.getBitcast(VT, Concat);
19728 }
19729 }
19730 }
19731 }
19732
19733 // Check if we can express BUILD VECTOR via subvector extract.
19734 if (!LegalTypes && (N->getNumOperands() > 1)) {
19735 SDValue Op0 = N->getOperand(0);
19736 auto checkElem = [&](SDValue Op) -> uint64_t {
19737 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19738 (Op0.getOperand(0) == Op.getOperand(0)))
19739 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19740 return CNode->getZExtValue();
19741 return -1;
19742 };
19743
19744 int Offset = checkElem(Op0);
19745 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19746 if (Offset + i != checkElem(N->getOperand(i))) {
19747 Offset = -1;
19748 break;
19749 }
19750 }
19751
19752 if ((Offset == 0) &&
19753 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19754 return Op0.getOperand(0);
19755 if ((Offset != -1) &&
19756 ((Offset % N->getValueType(0).getVectorNumElements()) ==
19757 0)) // IDX must be multiple of output size.
19758 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19759 Op0.getOperand(0), Op0.getOperand(1));
19760 }
19761
19762 if (SDValue V = convertBuildVecZextToZext(N))
19763 return V;
19764
19765 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19766 return V;
19767
19768 if (SDValue V = reduceBuildVecTruncToBitCast(N))
19769 return V;
19770
19771 if (SDValue V = reduceBuildVecToShuffle(N))
19772 return V;
19773
19774 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19775 // Do this late as some of the above may replace the splat.
19776 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19777 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19778 assert(!V.isUndef() && "Splat of undef should have been handled earlier")((void)0);
19779 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19780 }
19781
19782 return SDValue();
19783}
19784
19785static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19787 EVT OpVT = N->getOperand(0).getValueType();
19788
19789 // If the operands are legal vectors, leave them alone.
19790 if (TLI.isTypeLegal(OpVT))
19791 return SDValue();
19792
19793 SDLoc DL(N);
19794 EVT VT = N->getValueType(0);
19795 SmallVector<SDValue, 8> Ops;
19796
19797 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19798 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19799
19800 // Keep track of what we encounter.
19801 bool AnyInteger = false;
19802 bool AnyFP = false;
19803 for (const SDValue &Op : N->ops()) {
19804 if (ISD::BITCAST == Op.getOpcode() &&
19805 !Op.getOperand(0).getValueType().isVector())
19806 Ops.push_back(Op.getOperand(0));
19807 else if (ISD::UNDEF == Op.getOpcode())
19808 Ops.push_back(ScalarUndef);
19809 else
19810 return SDValue();
19811
19812 // Note whether we encounter an integer or floating point scalar.
19813 // If it's neither, bail out, it could be something weird like x86mmx.
19814 EVT LastOpVT = Ops.back().getValueType();
19815 if (LastOpVT.isFloatingPoint())
19816 AnyFP = true;
19817 else if (LastOpVT.isInteger())
19818 AnyInteger = true;
19819 else
19820 return SDValue();
19821 }
19822
19823 // If any of the operands is a floating point scalar bitcast to a vector,
19824 // use floating point types throughout, and bitcast everything.
19825 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19826 if (AnyFP) {
19827 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19828 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19829 if (AnyInteger) {
19830 for (SDValue &Op : Ops) {
19831 if (Op.getValueType() == SVT)
19832 continue;
19833 if (Op.isUndef())
19834 Op = ScalarUndef;
19835 else
19836 Op = DAG.getBitcast(SVT, Op);
19837 }
19838 }
19839 }
19840
19841 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19842 VT.getSizeInBits() / SVT.getSizeInBits());
19843 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19844}
19845
19846// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19847// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19848// most two distinct vectors the same size as the result, attempt to turn this
19849// into a legal shuffle.
19850static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19851 EVT VT = N->getValueType(0);
19852 EVT OpVT = N->getOperand(0).getValueType();
19853
19854 // We currently can't generate an appropriate shuffle for a scalable vector.
19855 if (VT.isScalableVector())
19856 return SDValue();
19857
19858 int NumElts = VT.getVectorNumElements();
19859 int NumOpElts = OpVT.getVectorNumElements();
19860
19861 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19862 SmallVector<int, 8> Mask;
19863
19864 for (SDValue Op : N->ops()) {
19865 Op = peekThroughBitcasts(Op);
19866
19867 // UNDEF nodes convert to UNDEF shuffle mask values.
19868 if (Op.isUndef()) {
19869 Mask.append((unsigned)NumOpElts, -1);
19870 continue;
19871 }
19872
19873 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19874 return SDValue();
19875
19876 // What vector are we extracting the subvector from and at what index?
19877 SDValue ExtVec = Op.getOperand(0);
19878 int ExtIdx = Op.getConstantOperandVal(1);
19879
19880 // We want the EVT of the original extraction to correctly scale the
19881 // extraction index.
19882 EVT ExtVT = ExtVec.getValueType();
19883 ExtVec = peekThroughBitcasts(ExtVec);
19884
19885 // UNDEF nodes convert to UNDEF shuffle mask values.
19886 if (ExtVec.isUndef()) {
19887 Mask.append((unsigned)NumOpElts, -1);
19888 continue;
19889 }
19890
19891 // Ensure that we are extracting a subvector from a vector the same
19892 // size as the result.
19893 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19894 return SDValue();
19895
19896 // Scale the subvector index to account for any bitcast.
19897 int NumExtElts = ExtVT.getVectorNumElements();
19898 if (0 == (NumExtElts % NumElts))
19899 ExtIdx /= (NumExtElts / NumElts);
19900 else if (0 == (NumElts % NumExtElts))
19901 ExtIdx *= (NumElts / NumExtElts);
19902 else
19903 return SDValue();
19904
19905 // At most we can reference 2 inputs in the final shuffle.
19906 if (SV0.isUndef() || SV0 == ExtVec) {
19907 SV0 = ExtVec;
19908 for (int i = 0; i != NumOpElts; ++i)
19909 Mask.push_back(i + ExtIdx);
19910 } else if (SV1.isUndef() || SV1 == ExtVec) {
19911 SV1 = ExtVec;
19912 for (int i = 0; i != NumOpElts; ++i)
19913 Mask.push_back(i + ExtIdx + NumElts);
19914 } else {
19915 return SDValue();
19916 }
19917 }
19918
19919 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19920 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19921 DAG.getBitcast(VT, SV1), Mask, DAG);
19922}
19923
19924static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19925 unsigned CastOpcode = N->getOperand(0).getOpcode();
19926 switch (CastOpcode) {
19927 case ISD::SINT_TO_FP:
19928 case ISD::UINT_TO_FP:
19929 case ISD::FP_TO_SINT:
19930 case ISD::FP_TO_UINT:
19931 // TODO: Allow more opcodes?
19932 // case ISD::BITCAST:
19933 // case ISD::TRUNCATE:
19934 // case ISD::ZERO_EXTEND:
19935 // case ISD::SIGN_EXTEND:
19936 // case ISD::FP_EXTEND:
19937 break;
19938 default:
19939 return SDValue();
19940 }
19941
19942 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19943 if (!SrcVT.isVector())
19944 return SDValue();
19945
19946 // All operands of the concat must be the same kind of cast from the same
19947 // source type.
19948 SmallVector<SDValue, 4> SrcOps;
19949 for (SDValue Op : N->ops()) {
19950 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19951 Op.getOperand(0).getValueType() != SrcVT)
19952 return SDValue();
19953 SrcOps.push_back(Op.getOperand(0));
19954 }
19955
19956 // The wider cast must be supported by the target. This is unusual because
19957 // the operation support type parameter depends on the opcode. In addition,
19958 // check the other type in the cast to make sure this is really legal.
19959 EVT VT = N->getValueType(0);
19960 EVT SrcEltVT = SrcVT.getVectorElementType();
19961 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19962 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19963 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19964 switch (CastOpcode) {
19965 case ISD::SINT_TO_FP:
19966 case ISD::UINT_TO_FP:
19967 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19968 !TLI.isTypeLegal(VT))
19969 return SDValue();
19970 break;
19971 case ISD::FP_TO_SINT:
19972 case ISD::FP_TO_UINT:
19973 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19974 !TLI.isTypeLegal(ConcatSrcVT))
19975 return SDValue();
19976 break;
19977 default:
19978 llvm_unreachable("Unexpected cast opcode")__builtin_unreachable();
19979 }
19980
19981 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19982 SDLoc DL(N);
19983 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19984 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19985}
19986
19987SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19988 // If we only have one input vector, we don't need to do any concatenation.
19989 if (N->getNumOperands() == 1)
19990 return N->getOperand(0);
19991
19992 // Check if all of the operands are undefs.
19993 EVT VT = N->getValueType(0);
19994 if (ISD::allOperandsUndef(N))
19995 return DAG.getUNDEF(VT);
19996
19997 // Optimize concat_vectors where all but the first of the vectors are undef.
19998 if (all_of(drop_begin(N->ops()),
19999 [](const SDValue &Op) { return Op.isUndef(); })) {
20000 SDValue In = N->getOperand(0);
20001 assert(In.getValueType().isVector() && "Must concat vectors")((void)0);
20002
20003 // If the input is a concat_vectors, just make a larger concat by padding
20004 // with smaller undefs.
20005 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20006 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20007 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20008 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20009 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20010 }
20011
20012 SDValue Scalar = peekThroughOneUseBitcasts(In);
20013
20014 // concat_vectors(scalar_to_vector(scalar), undef) ->
20015 // scalar_to_vector(scalar)
20016 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20017 Scalar.hasOneUse()) {
20018 EVT SVT = Scalar.getValueType().getVectorElementType();
20019 if (SVT == Scalar.getOperand(0).getValueType())
20020 Scalar = Scalar.getOperand(0);
20021 }
20022
20023 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20024 if (!Scalar.getValueType().isVector()) {
20025 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20026 // look through the trunc so we can still do the transform:
20027 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20028 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20029 !TLI.isTypeLegal(Scalar.getValueType()) &&
20030 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20031 Scalar = Scalar->getOperand(0);
20032
20033 EVT SclTy = Scalar.getValueType();
20034
20035 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20036 return SDValue();
20037
20038 // Bail out if the vector size is not a multiple of the scalar size.
20039 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20040 return SDValue();
20041
20042 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20043 if (VNTNumElms < 2)
20044 return SDValue();
20045
20046 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20047 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20048 return SDValue();
20049
20050 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20051 return DAG.getBitcast(VT, Res);
20052 }
20053 }
20054
20055 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20056 // We have already tested above for an UNDEF only concatenation.
20057 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20058 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20059 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20060 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20061 };
20062 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20063 SmallVector<SDValue, 8> Opnds;
20064 EVT SVT = VT.getScalarType();
20065
20066 EVT MinVT = SVT;
20067 if (!SVT.isFloatingPoint()) {
20068 // If BUILD_VECTOR are from built from integer, they may have different
20069 // operand types. Get the smallest type and truncate all operands to it.
20070 bool FoundMinVT = false;
20071 for (const SDValue &Op : N->ops())
20072 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20073 EVT OpSVT = Op.getOperand(0).getValueType();
20074 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20075 FoundMinVT = true;
20076 }
20077 assert(FoundMinVT && "Concat vector type mismatch")((void)0);
20078 }
20079
20080 for (const SDValue &Op : N->ops()) {
20081 EVT OpVT = Op.getValueType();
20082 unsigned NumElts = OpVT.getVectorNumElements();
20083
20084 if (ISD::UNDEF == Op.getOpcode())
20085 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20086
20087 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20088 if (SVT.isFloatingPoint()) {
20089 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")((void)0);
20090 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20091 } else {
20092 for (unsigned i = 0; i != NumElts; ++i)
20093 Opnds.push_back(
20094 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20095 }
20096 }
20097 }
20098
20099 assert(VT.getVectorNumElements() == Opnds.size() &&((void)0)
20100 "Concat vector type mismatch")((void)0);
20101 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20102 }
20103
20104 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20105 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20106 return V;
20107
20108 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20109 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
20110 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20111 return V;
20112
20113 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20114 return V;
20115
20116 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20117 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20118 // operands and look for a CONCAT operations that place the incoming vectors
20119 // at the exact same location.
20120 //
20121 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20122 SDValue SingleSource = SDValue();
20123 unsigned PartNumElem =
20124 N->getOperand(0).getValueType().getVectorMinNumElements();
20125
20126 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20127 SDValue Op = N->getOperand(i);
20128
20129 if (Op.isUndef())
20130 continue;
20131
20132 // Check if this is the identity extract:
20133 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20134 return SDValue();
20135
20136 // Find the single incoming vector for the extract_subvector.
20137 if (SingleSource.getNode()) {
20138 if (Op.getOperand(0) != SingleSource)
20139 return SDValue();
20140 } else {
20141 SingleSource = Op.getOperand(0);
20142
20143 // Check the source type is the same as the type of the result.
20144 // If not, this concat may extend the vector, so we can not
20145 // optimize it away.
20146 if (SingleSource.getValueType() != N->getValueType(0))
20147 return SDValue();
20148 }
20149
20150 // Check that we are reading from the identity index.
20151 unsigned IdentityIndex = i * PartNumElem;
20152 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20153 return SDValue();
20154 }
20155
20156 if (SingleSource.getNode())
20157 return SingleSource;
20158
20159 return SDValue();
20160}
20161
20162// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20163// if the subvector can be sourced for free.
20164static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20165 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20166 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20167 return V.getOperand(1);
20168 }
20169 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20170 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20171 V.getOperand(0).getValueType() == SubVT &&
20172 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20173 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20174 return V.getOperand(SubIdx);
20175 }
20176 return SDValue();
20177}
20178
20179static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20180 SelectionDAG &DAG,
20181 bool LegalOperations) {
20182 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20183 SDValue BinOp = Extract->getOperand(0);
20184 unsigned BinOpcode = BinOp.getOpcode();
20185 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20186 return SDValue();
20187
20188 EVT VecVT = BinOp.getValueType();
20189 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20190 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20191 return SDValue();
20192
20193 SDValue Index = Extract->getOperand(1);
20194 EVT SubVT = Extract->getValueType(0);
20195 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20196 return SDValue();
20197
20198 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20199 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20200
20201 // TODO: We could handle the case where only 1 operand is being inserted by
20202 // creating an extract of the other operand, but that requires checking
20203 // number of uses and/or costs.
20204 if (!Sub0 || !Sub1)
20205 return SDValue();
20206
20207 // We are inserting both operands of the wide binop only to extract back
20208 // to the narrow vector size. Eliminate all of the insert/extract:
20209 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20210 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20211 BinOp->getFlags());
20212}
20213
20214/// If we are extracting a subvector produced by a wide binary operator try
20215/// to use a narrow binary operator and/or avoid concatenation and extraction.
20216static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20217 bool LegalOperations) {
20218 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20219 // some of these bailouts with other transforms.
20220
20221 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20222 return V;
20223
20224 // The extract index must be a constant, so we can map it to a concat operand.
20225 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20226 if (!ExtractIndexC)
20227 return SDValue();
20228
20229 // We are looking for an optionally bitcasted wide vector binary operator
20230 // feeding an extract subvector.
20231 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20232 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20233 unsigned BOpcode = BinOp.getOpcode();
20234 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20235 return SDValue();
20236
20237 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20238 // reduced to the unary fneg when it is visited, and we probably want to deal
20239 // with fneg in a target-specific way.
20240 if (BOpcode == ISD::FSUB) {
20241 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20242 if (C && C->getValueAPF().isNegZero())
20243 return SDValue();
20244 }
20245
20246 // The binop must be a vector type, so we can extract some fraction of it.
20247 EVT WideBVT = BinOp.getValueType();
20248 // The optimisations below currently assume we are dealing with fixed length
20249 // vectors. It is possible to add support for scalable vectors, but at the
20250 // moment we've done no analysis to prove whether they are profitable or not.
20251 if (!WideBVT.isFixedLengthVector())
20252 return SDValue();
20253
20254 EVT VT = Extract->getValueType(0);
20255 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20256 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&((void)0)
20257 "Extract index is not a multiple of the vector length.")((void)0);
20258
20259 // Bail out if this is not a proper multiple width extraction.
20260 unsigned WideWidth = WideBVT.getSizeInBits();
20261 unsigned NarrowWidth = VT.getSizeInBits();
20262 if (WideWidth % NarrowWidth != 0)
20263 return SDValue();
20264
20265 // Bail out if we are extracting a fraction of a single operation. This can
20266 // occur because we potentially looked through a bitcast of the binop.
20267 unsigned NarrowingRatio = WideWidth / NarrowWidth;
20268 unsigned WideNumElts = WideBVT.getVectorNumElements();
20269 if (WideNumElts % NarrowingRatio != 0)
20270 return SDValue();
20271
20272 // Bail out if the target does not support a narrower version of the binop.
20273 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20274 WideNumElts / NarrowingRatio);
20275 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20276 return SDValue();
20277
20278 // If extraction is cheap, we don't need to look at the binop operands
20279 // for concat ops. The narrow binop alone makes this transform profitable.
20280 // We can't just reuse the original extract index operand because we may have
20281 // bitcasted.
20282 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20283 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20284 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20285 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20286 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20287 SDLoc DL(Extract);
20288 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20289 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20290 BinOp.getOperand(0), NewExtIndex);
20291 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20292 BinOp.getOperand(1), NewExtIndex);
20293 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20294 BinOp.getNode()->getFlags());
20295 return DAG.getBitcast(VT, NarrowBinOp);
20296 }
20297
20298 // Only handle the case where we are doubling and then halving. A larger ratio
20299 // may require more than two narrow binops to replace the wide binop.
20300 if (NarrowingRatio != 2)
20301 return SDValue();
20302
20303 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20304 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20305 // flavors, but no other 256-bit integer support. This could be extended to
20306 // handle any binop, but that may require fixing/adding other folds to avoid
20307 // codegen regressions.
20308 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20309 return SDValue();
20310
20311 // We need at least one concatenation operation of a binop operand to make
20312 // this transform worthwhile. The concat must double the input vector sizes.
20313 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20314 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20315 return V.getOperand(ConcatOpNum);
20316 return SDValue();
20317 };
20318 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20319 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20320
20321 if (SubVecL || SubVecR) {
20322 // If a binop operand was not the result of a concat, we must extract a
20323 // half-sized operand for our new narrow binop:
20324 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20325 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20326 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20327 SDLoc DL(Extract);
20328 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20329 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20330 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20331 BinOp.getOperand(0), IndexC);
20332
20333 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20334 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20335 BinOp.getOperand(1), IndexC);
20336
20337 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20338 return DAG.getBitcast(VT, NarrowBinOp);
20339 }
20340
20341 return SDValue();
20342}
20343
20344/// If we are extracting a subvector from a wide vector load, convert to a
20345/// narrow load to eliminate the extraction:
20346/// (extract_subvector (load wide vector)) --> (load narrow vector)
20347static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20348 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20349 if (DAG.getDataLayout().isBigEndian())
20350 return SDValue();
20351
20352 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20353 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20354 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20355 !ExtIdx)
20356 return SDValue();
20357
20358 // Allow targets to opt-out.
20359 EVT VT = Extract->getValueType(0);
20360
20361 // We can only create byte sized loads.
20362 if (!VT.isByteSized())
20363 return SDValue();
20364
20365 unsigned Index = ExtIdx->getZExtValue();
20366 unsigned NumElts = VT.getVectorMinNumElements();
20367
20368 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20369 // multiple of the minimum number of elements in the result type.
20370 assert(Index % NumElts == 0 && "The extract subvector index is not a "((void)0)
20371 "multiple of the result's element count")((void)0);
20372
20373 // It's fine to use TypeSize here as we know the offset will not be negative.
20374 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20375
20376 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20377 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20378 return SDValue();
20379
20380 // The narrow load will be offset from the base address of the old load if
20381 // we are extracting from something besides index 0 (little-endian).
20382 SDLoc DL(Extract);
20383
20384 // TODO: Use "BaseIndexOffset" to make this more effective.
20385 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20386
20387 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20388 MachineFunction &MF = DAG.getMachineFunction();
20389 MachineMemOperand *MMO;
20390 if (Offset.isScalable()) {
20391 MachinePointerInfo MPI =
20392 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20393 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20394 } else
20395 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20396 StoreSize);
20397
20398 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20399 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20400 return NewLd;
20401}
20402
20403SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20404 EVT NVT = N->getValueType(0);
20405 SDValue V = N->getOperand(0);
20406 uint64_t ExtIdx = N->getConstantOperandVal(1);
20407
20408 // Extract from UNDEF is UNDEF.
20409 if (V.isUndef())
20410 return DAG.getUNDEF(NVT);
20411
20412 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20413 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20414 return NarrowLoad;
20415
20416 // Combine an extract of an extract into a single extract_subvector.
20417 // ext (ext X, C), 0 --> ext X, C
20418 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20419 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20420 V.getConstantOperandVal(1)) &&
20421 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20422 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20423 V.getOperand(1));
20424 }
20425 }
20426
20427 // Try to move vector bitcast after extract_subv by scaling extraction index:
20428 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20429 if (V.getOpcode() == ISD::BITCAST &&
20430 V.getOperand(0).getValueType().isVector() &&
20431 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20432 SDValue SrcOp = V.getOperand(0);
20433 EVT SrcVT = SrcOp.getValueType();
20434 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20435 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20436 if ((SrcNumElts % DestNumElts) == 0) {
20437 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20438 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20439 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20440 NewExtEC);
20441 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20442 SDLoc DL(N);
20443 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20444 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20445 V.getOperand(0), NewIndex);
20446 return DAG.getBitcast(NVT, NewExtract);
20447 }
20448 }
20449 if ((DestNumElts % SrcNumElts) == 0) {
20450 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20451 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20452 ElementCount NewExtEC =
20453 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20454 EVT ScalarVT = SrcVT.getScalarType();
20455 if ((ExtIdx % DestSrcRatio) == 0) {
20456 SDLoc DL(N);
20457 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20458 EVT NewExtVT =
20459 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20460 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20461 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20462 SDValue NewExtract =
20463 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20464 V.getOperand(0), NewIndex);
20465 return DAG.getBitcast(NVT, NewExtract);
20466 }
20467 if (NewExtEC.isScalar() &&
20468 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20469 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20470 SDValue NewExtract =
20471 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20472 V.getOperand(0), NewIndex);
20473 return DAG.getBitcast(NVT, NewExtract);
20474 }
20475 }
20476 }
20477 }
20478 }
20479
20480 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20481 unsigned ExtNumElts = NVT.getVectorMinNumElements();
20482 EVT ConcatSrcVT = V.getOperand(0).getValueType();
20483 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&((void)0)
20484 "Concat and extract subvector do not change element type")((void)0);
20485 assert((ExtIdx % ExtNumElts) == 0 &&((void)0)
20486 "Extract index is not a multiple of the input vector length.")((void)0);
20487
20488 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20489 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20490
20491 // If the concatenated source types match this extract, it's a direct
20492 // simplification:
20493 // extract_subvec (concat V1, V2, ...), i --> Vi
20494 if (ConcatSrcNumElts == ExtNumElts)
20495 return V.getOperand(ConcatOpIdx);
20496
20497 // If the concatenated source vectors are a multiple length of this extract,
20498 // then extract a fraction of one of those source vectors directly from a
20499 // concat operand. Example:
20500 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20501 // v2i8 extract_subvec v8i8 Y, 6
20502 if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20503 SDLoc DL(N);
20504 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20505 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&((void)0)
20506 "Trying to extract from >1 concat operand?")((void)0);
20507 assert(NewExtIdx % ExtNumElts == 0 &&((void)0)
20508 "Extract index is not a multiple of the input vector length.")((void)0);
20509 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20510 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20511 V.getOperand(ConcatOpIdx), NewIndexC);
20512 }
20513 }
20514
20515 V = peekThroughBitcasts(V);
20516
20517 // If the input is a build vector. Try to make a smaller build vector.
20518 if (V.getOpcode() == ISD::BUILD_VECTOR) {
20519 EVT InVT = V.getValueType();
20520 unsigned ExtractSize = NVT.getSizeInBits();
20521 unsigned EltSize = InVT.getScalarSizeInBits();
20522 // Only do this if we won't split any elements.
20523 if (ExtractSize % EltSize == 0) {
20524 unsigned NumElems = ExtractSize / EltSize;
20525 EVT EltVT = InVT.getVectorElementType();
20526 EVT ExtractVT =
20527 NumElems == 1 ? EltVT
20528 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20529 if ((Level < AfterLegalizeDAG ||
20530 (NumElems == 1 ||
20531 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20532 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20533 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20534
20535 if (NumElems == 1) {
20536 SDValue Src = V->getOperand(IdxVal);
20537 if (EltVT != Src.getValueType())
20538 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20539 return DAG.getBitcast(NVT, Src);
20540 }
20541
20542 // Extract the pieces from the original build_vector.
20543 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20544 V->ops().slice(IdxVal, NumElems));
20545 return DAG.getBitcast(NVT, BuildVec);
20546 }
20547 }
20548 }
20549
20550 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20551 // Handle only simple case where vector being inserted and vector
20552 // being extracted are of same size.
20553 EVT SmallVT = V.getOperand(1).getValueType();
20554 if (!NVT.bitsEq(SmallVT))
20555 return SDValue();
20556
20557 // Combine:
20558 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20559 // Into:
20560 // indices are equal or bit offsets are equal => V1
20561 // otherwise => (extract_subvec V1, ExtIdx)
20562 uint64_t InsIdx = V.getConstantOperandVal(2);
20563 if (InsIdx * SmallVT.getScalarSizeInBits() ==
20564 ExtIdx * NVT.getScalarSizeInBits()) {
20565 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
20566 return SDValue();
20567
20568 return DAG.getBitcast(NVT, V.getOperand(1));
20569 }
20570 return DAG.getNode(
20571 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20572 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20573 N->getOperand(1));
20574 }
20575
20576 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20577 return NarrowBOp;
20578
20579 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20580 return SDValue(N, 0);
20581
20582 return SDValue();
20583}
20584
20585/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20586/// followed by concatenation. Narrow vector ops may have better performance
20587/// than wide ops, and this can unlock further narrowing of other vector ops.
20588/// Targets can invert this transform later if it is not profitable.
20589static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20590 SelectionDAG &DAG) {
20591 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20592 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20593 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20594 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20595 return SDValue();
20596
20597 // Split the wide shuffle mask into halves. Any mask element that is accessing
20598 // operand 1 is offset down to account for narrowing of the vectors.
20599 ArrayRef<int> Mask = Shuf->getMask();
20600 EVT VT = Shuf->getValueType(0);
20601 unsigned NumElts = VT.getVectorNumElements();
20602 unsigned HalfNumElts = NumElts / 2;
20603 SmallVector<int, 16> Mask0(HalfNumElts, -1);
20604 SmallVector<int, 16> Mask1(HalfNumElts, -1);
20605 for (unsigned i = 0; i != NumElts; ++i) {
20606 if (Mask[i] == -1)
20607 continue;
20608 // If we reference the upper (undef) subvector then the element is undef.
20609 if ((Mask[i] % NumElts) >= HalfNumElts)
20610 continue;
20611 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20612 if (i < HalfNumElts)
20613 Mask0[i] = M;
20614 else
20615 Mask1[i - HalfNumElts] = M;
20616 }
20617
20618 // Ask the target if this is a valid transform.
20619 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20620 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20621 HalfNumElts);
20622 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20623 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20624 return SDValue();
20625
20626 // shuffle (concat X, undef), (concat Y, undef), Mask -->
20627 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20628 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20629 SDLoc DL(Shuf);
20630 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20631 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20632 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20633}
20634
20635// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20636// or turn a shuffle of a single concat into simpler shuffle then concat.
20637static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20638 EVT VT = N->getValueType(0);
20639 unsigned NumElts = VT.getVectorNumElements();
20640
20641 SDValue N0 = N->getOperand(0);
20642 SDValue N1 = N->getOperand(1);
20643 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20644 ArrayRef<int> Mask = SVN->getMask();
20645
20646 SmallVector<SDValue, 4> Ops;
20647 EVT ConcatVT = N0.getOperand(0).getValueType();
20648 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20649 unsigned NumConcats = NumElts / NumElemsPerConcat;
20650
20651 auto IsUndefMaskElt = [](int i) { return i == -1; };
20652
20653 // Special case: shuffle(concat(A,B)) can be more efficiently represented
20654 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20655 // half vector elements.
20656 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20657 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20658 IsUndefMaskElt)) {
20659 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20660 N0.getOperand(1),
20661 Mask.slice(0, NumElemsPerConcat));
20662 N1 = DAG.getUNDEF(ConcatVT);
20663 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20664 }
20665
20666 // Look at every vector that's inserted. We're looking for exact
20667 // subvector-sized copies from a concatenated vector
20668 for (unsigned I = 0; I != NumConcats; ++I) {
20669 unsigned Begin = I * NumElemsPerConcat;
20670 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20671
20672 // Make sure we're dealing with a copy.
20673 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20674 Ops.push_back(DAG.getUNDEF(ConcatVT));
20675 continue;
20676 }
20677
20678 int OpIdx = -1;
20679 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20680 if (IsUndefMaskElt(SubMask[i]))
20681 continue;
20682 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20683 return SDValue();
20684 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20685 if (0 <= OpIdx && EltOpIdx != OpIdx)
20686 return SDValue();
20687 OpIdx = EltOpIdx;
20688 }
20689 assert(0 <= OpIdx && "Unknown concat_vectors op")((void)0);
20690
20691 if (OpIdx < (int)N0.getNumOperands())
20692 Ops.push_back(N0.getOperand(OpIdx));
20693 else
20694 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20695 }
20696
20697 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20698}
20699
20700// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20701// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20702//
20703// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20704// a simplification in some sense, but it isn't appropriate in general: some
20705// BUILD_VECTORs are substantially cheaper than others. The general case
20706// of a BUILD_VECTOR requires inserting each element individually (or
20707// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20708// all constants is a single constant pool load. A BUILD_VECTOR where each
20709// element is identical is a splat. A BUILD_VECTOR where most of the operands
20710// are undef lowers to a small number of element insertions.
20711//
20712// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20713// We don't fold shuffles where one side is a non-zero constant, and we don't
20714// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20715// non-constant operands. This seems to work out reasonably well in practice.
20716static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20717 SelectionDAG &DAG,
20718 const TargetLowering &TLI) {
20719 EVT VT = SVN->getValueType(0);
20720 unsigned NumElts = VT.getVectorNumElements();
20721 SDValue N0 = SVN->getOperand(0);
20722 SDValue N1 = SVN->getOperand(1);
20723
20724 if (!N0->hasOneUse())
20725 return SDValue();
20726
20727 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20728 // discussed above.
20729 if (!N1.isUndef()) {
20730 if (!N1->hasOneUse())
20731 return SDValue();
20732
20733 bool N0AnyConst = isAnyConstantBuildVector(N0);
20734 bool N1AnyConst = isAnyConstantBuildVector(N1);
20735 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20736 return SDValue();
20737 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20738 return SDValue();
20739 }
20740
20741 // If both inputs are splats of the same value then we can safely merge this
20742 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20743 bool IsSplat = false;
20744 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20745 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20746 if (BV0 && BV1)
20747 if (SDValue Splat0 = BV0->getSplatValue())
20748 IsSplat = (Splat0 == BV1->getSplatValue());
20749
20750 SmallVector<SDValue, 8> Ops;
20751 SmallSet<SDValue, 16> DuplicateOps;
20752 for (int M : SVN->getMask()) {
20753 SDValue Op = DAG.getUNDEF(VT.getScalarType());
20754 if (M >= 0) {
20755 int Idx = M < (int)NumElts ? M : M - NumElts;
20756 SDValue &S = (M < (int)NumElts ? N0 : N1);
20757 if (S.getOpcode() == ISD::BUILD_VECTOR) {
20758 Op = S.getOperand(Idx);
20759 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20760 SDValue Op0 = S.getOperand(0);
20761 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20762 } else {
20763 // Operand can't be combined - bail out.
20764 return SDValue();
20765 }
20766 }
20767
20768 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20769 // generating a splat; semantically, this is fine, but it's likely to
20770 // generate low-quality code if the target can't reconstruct an appropriate
20771 // shuffle.
20772 if (!Op.isUndef() && !isIntOrFPConstant(Op))
20773 if (!IsSplat && !DuplicateOps.insert(Op).second)
20774 return SDValue();
20775
20776 Ops.push_back(Op);
20777 }
20778
20779 // BUILD_VECTOR requires all inputs to be of the same type, find the
20780 // maximum type and extend them all.
20781 EVT SVT = VT.getScalarType();
20782 if (SVT.isInteger())
20783 for (SDValue &Op : Ops)
20784 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20785 if (SVT != VT.getScalarType())
20786 for (SDValue &Op : Ops)
20787 Op = TLI.isZExtFree(Op.getValueType(), SVT)
20788 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20789 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20790 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20791}
20792
20793// Match shuffles that can be converted to any_vector_extend_in_reg.
20794// This is often generated during legalization.
20795// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20796// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20797static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20798 SelectionDAG &DAG,
20799 const TargetLowering &TLI,
20800 bool LegalOperations) {
20801 EVT VT = SVN->getValueType(0);
20802 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20803
20804 // TODO Add support for big-endian when we have a test case.
20805 if (!VT.isInteger() || IsBigEndian)
20806 return SDValue();
20807
20808 unsigned NumElts = VT.getVectorNumElements();
20809 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20810 ArrayRef<int> Mask = SVN->getMask();
20811 SDValue N0 = SVN->getOperand(0);
20812
20813 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20814 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20815 for (unsigned i = 0; i != NumElts; ++i) {
20816 if (Mask[i] < 0)
20817 continue;
20818 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20819 continue;
20820 return false;
20821 }
20822 return true;
20823 };
20824
20825 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20826 // power-of-2 extensions as they are the most likely.
20827 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20828 // Check for non power of 2 vector sizes
20829 if (NumElts % Scale != 0)
20830 continue;
20831 if (!isAnyExtend(Scale))
20832 continue;
20833
20834 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20835 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20836 // Never create an illegal type. Only create unsupported operations if we
20837 // are pre-legalization.
20838 if (TLI.isTypeLegal(OutVT))
20839 if (!LegalOperations ||
20840 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20841 return DAG.getBitcast(VT,
20842 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20843 SDLoc(SVN), OutVT, N0));
20844 }
20845
20846 return SDValue();
20847}
20848
20849// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20850// each source element of a large type into the lowest elements of a smaller
20851// destination type. This is often generated during legalization.
20852// If the source node itself was a '*_extend_vector_inreg' node then we should
20853// then be able to remove it.
20854static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20855 SelectionDAG &DAG) {
20856 EVT VT = SVN->getValueType(0);
20857 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20858
20859 // TODO Add support for big-endian when we have a test case.
20860 if (!VT.isInteger() || IsBigEndian)
20861 return SDValue();
20862
20863 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20864
20865 unsigned Opcode = N0.getOpcode();
20866 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20867 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20868 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20869 return SDValue();
20870
20871 SDValue N00 = N0.getOperand(0);
20872 ArrayRef<int> Mask = SVN->getMask();
20873 unsigned NumElts = VT.getVectorNumElements();
20874 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20875 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20876 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20877
20878 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20879 return SDValue();
20880 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20881
20882 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20883 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20884 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20885 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20886 for (unsigned i = 0; i != NumElts; ++i) {
20887 if (Mask[i] < 0)
20888 continue;
20889 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20890 continue;
20891 return false;
20892 }
20893 return true;
20894 };
20895
20896 // At the moment we just handle the case where we've truncated back to the
20897 // same size as before the extension.
20898 // TODO: handle more extension/truncation cases as cases arise.
20899 if (EltSizeInBits != ExtSrcSizeInBits)
20900 return SDValue();
20901
20902 // We can remove *extend_vector_inreg only if the truncation happens at
20903 // the same scale as the extension.
20904 if (isTruncate(ExtScale))
20905 return DAG.getBitcast(VT, N00);
20906
20907 return SDValue();
20908}
20909
20910// Combine shuffles of splat-shuffles of the form:
20911// shuffle (shuffle V, undef, splat-mask), undef, M
20912// If splat-mask contains undef elements, we need to be careful about
20913// introducing undef's in the folded mask which are not the result of composing
20914// the masks of the shuffles.
20915static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20916 SelectionDAG &DAG) {
20917 if (!Shuf->getOperand(1).isUndef())
20918 return SDValue();
20919 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20920 if (!Splat || !Splat->isSplat())
20921 return SDValue();
20922
20923 ArrayRef<int> ShufMask = Shuf->getMask();
20924 ArrayRef<int> SplatMask = Splat->getMask();
20925 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")((void)0);
20926
20927 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20928 // every undef mask element in the splat-shuffle has a corresponding undef
20929 // element in the user-shuffle's mask or if the composition of mask elements
20930 // would result in undef.
20931 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20932 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20933 // In this case it is not legal to simplify to the splat-shuffle because we
20934 // may be exposing the users of the shuffle an undef element at index 1
20935 // which was not there before the combine.
20936 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20937 // In this case the composition of masks yields SplatMask, so it's ok to
20938 // simplify to the splat-shuffle.
20939 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20940 // In this case the composed mask includes all undef elements of SplatMask
20941 // and in addition sets element zero to undef. It is safe to simplify to
20942 // the splat-shuffle.
20943 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20944 ArrayRef<int> SplatMask) {
20945 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20946 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20947 SplatMask[UserMask[i]] != -1)
20948 return false;
20949 return true;
20950 };
20951 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20952 return Shuf->getOperand(0);
20953
20954 // Create a new shuffle with a mask that is composed of the two shuffles'
20955 // masks.
20956 SmallVector<int, 32> NewMask;
20957 for (int Idx : ShufMask)
20958 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20959
20960 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20961 Splat->getOperand(0), Splat->getOperand(1),
20962 NewMask);
20963}
20964
20965/// Combine shuffle of shuffle of the form:
20966/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20967static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20968 SelectionDAG &DAG) {
20969 if (!OuterShuf->getOperand(1).isUndef())
20970 return SDValue();
20971 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20972 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20973 return SDValue();
20974
20975 ArrayRef<int> OuterMask = OuterShuf->getMask();
20976 ArrayRef<int> InnerMask = InnerShuf->getMask();
20977 unsigned NumElts = OuterMask.size();
20978 assert(NumElts == InnerMask.size() && "Mask length mismatch")((void)0);
20979 SmallVector<int, 32> CombinedMask(NumElts, -1);
20980 int SplatIndex = -1;
20981 for (unsigned i = 0; i != NumElts; ++i) {
20982 // Undef lanes remain undef.
20983 int OuterMaskElt = OuterMask[i];
20984 if (OuterMaskElt == -1)
20985 continue;
20986
20987 // Peek through the shuffle masks to get the underlying source element.
20988 int InnerMaskElt = InnerMask[OuterMaskElt];
20989 if (InnerMaskElt == -1)
20990 continue;
20991
20992 // Initialize the splatted element.
20993 if (SplatIndex == -1)
20994 SplatIndex = InnerMaskElt;
20995
20996 // Non-matching index - this is not a splat.
20997 if (SplatIndex != InnerMaskElt)
20998 return SDValue();
20999
21000 CombinedMask[i] = InnerMaskElt;
21001 }
21002 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||((void)0)
21003 getSplatIndex(CombinedMask) != -1) &&((void)0)
21004 "Expected a splat mask")((void)0);
21005
21006 // TODO: The transform may be a win even if the mask is not legal.
21007 EVT VT = OuterShuf->getValueType(0);
21008 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")((void)0);
21009 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21010 return SDValue();
21011
21012 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21013 InnerShuf->getOperand(1), CombinedMask);
21014}
21015
21016/// If the shuffle mask is taking exactly one element from the first vector
21017/// operand and passing through all other elements from the second vector
21018/// operand, return the index of the mask element that is choosing an element
21019/// from the first operand. Otherwise, return -1.
21020static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21021 int MaskSize = Mask.size();
21022 int EltFromOp0 = -1;
21023 // TODO: This does not match if there are undef elements in the shuffle mask.
21024 // Should we ignore undefs in the shuffle mask instead? The trade-off is
21025 // removing an instruction (a shuffle), but losing the knowledge that some
21026 // vector lanes are not needed.
21027 for (int i = 0; i != MaskSize; ++i) {
21028 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21029 // We're looking for a shuffle of exactly one element from operand 0.
21030 if (EltFromOp0 != -1)
21031 return -1;
21032 EltFromOp0 = i;
21033 } else if (Mask[i] != i + MaskSize) {
21034 // Nothing from operand 1 can change lanes.
21035 return -1;
21036 }
21037 }
21038 return EltFromOp0;
21039}
21040
21041/// If a shuffle inserts exactly one element from a source vector operand into
21042/// another vector operand and we can access the specified element as a scalar,
21043/// then we can eliminate the shuffle.
21044static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21045 SelectionDAG &DAG) {
21046 // First, check if we are taking one element of a vector and shuffling that
21047 // element into another vector.
21048 ArrayRef<int> Mask = Shuf->getMask();
21049 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21050 SDValue Op0 = Shuf->getOperand(0);
21051 SDValue Op1 = Shuf->getOperand(1);
21052 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21053 if (ShufOp0Index == -1) {
21054 // Commute mask and check again.
21055 ShuffleVectorSDNode::commuteMask(CommutedMask);
21056 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21057 if (ShufOp0Index == -1)
21058 return SDValue();
21059 // Commute operands to match the commuted shuffle mask.
21060 std::swap(Op0, Op1);
21061 Mask = CommutedMask;
21062 }
21063
21064 // The shuffle inserts exactly one element from operand 0 into operand 1.
21065 // Now see if we can access that element as a scalar via a real insert element
21066 // instruction.
21067 // TODO: We can try harder to locate the element as a scalar. Examples: it
21068 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21069 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&((void)0)
21070 "Shuffle mask value must be from operand 0")((void)0);
21071 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21072 return SDValue();
21073
21074 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21075 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21076 return SDValue();
21077
21078 // There's an existing insertelement with constant insertion index, so we
21079 // don't need to check the legality/profitability of a replacement operation
21080 // that differs at most in the constant value. The target should be able to
21081 // lower any of those in a similar way. If not, legalization will expand this
21082 // to a scalar-to-vector plus shuffle.
21083 //
21084 // Note that the shuffle may move the scalar from the position that the insert
21085 // element used. Therefore, our new insert element occurs at the shuffle's
21086 // mask index value, not the insert's index value.
21087 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21088 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21089 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21090 Op1, Op0.getOperand(1), NewInsIndex);
21091}
21092
21093/// If we have a unary shuffle of a shuffle, see if it can be folded away
21094/// completely. This has the potential to lose undef knowledge because the first
21095/// shuffle may not have an undef mask element where the second one does. So
21096/// only call this after doing simplifications based on demanded elements.
21097static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21098 // shuf (shuf0 X, Y, Mask0), undef, Mask
21099 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21100 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21101 return SDValue();
21102
21103 ArrayRef<int> Mask = Shuf->getMask();
21104 ArrayRef<int> Mask0 = Shuf0->getMask();
21105 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21106 // Ignore undef elements.
21107 if (Mask[i] == -1)
21108 continue;
21109 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")((void)0);
21110
21111 // Is the element of the shuffle operand chosen by this shuffle the same as
21112 // the element chosen by the shuffle operand itself?
21113 if (Mask0[Mask[i]] != Mask0[i])
21114 return SDValue();
21115 }
21116 // Every element of this shuffle is identical to the result of the previous
21117 // shuffle, so we can replace this value.
21118 return Shuf->getOperand(0);
21119}
21120
21121SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21122 EVT VT = N->getValueType(0);
21123 unsigned NumElts = VT.getVectorNumElements();
21124
21125 SDValue N0 = N->getOperand(0);
21126 SDValue N1 = N->getOperand(1);
21127
21128 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")((void)0);
21129
21130 // Canonicalize shuffle undef, undef -> undef
21131 if (N0.isUndef() && N1.isUndef())
21132 return DAG.getUNDEF(VT);
21133
21134 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21135
21136 // Canonicalize shuffle v, v -> v, undef
21137 if (N0 == N1) {
21138 SmallVector<int, 8> NewMask;
21139 for (unsigned i = 0; i != NumElts; ++i) {
21140 int Idx = SVN->getMaskElt(i);
21141 if (Idx >= (int)NumElts) Idx -= NumElts;
21142 NewMask.push_back(Idx);
21143 }
21144 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21145 }
21146
21147 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21148 if (N0.isUndef())
21149 return DAG.getCommutedVectorShuffle(*SVN);
21150
21151 // Remove references to rhs if it is undef
21152 if (N1.isUndef()) {
21153 bool Changed = false;
21154 SmallVector<int, 8> NewMask;
21155 for (unsigned i = 0; i != NumElts; ++i) {
21156 int Idx = SVN->getMaskElt(i);
21157 if (Idx >= (int)NumElts) {
21158 Idx = -1;
21159 Changed = true;
21160 }
21161 NewMask.push_back(Idx);
21162 }
21163 if (Changed)
21164 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21165 }
21166
21167 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21168 return InsElt;
21169
21170 // A shuffle of a single vector that is a splatted value can always be folded.
21171 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21172 return V;
21173
21174 if (SDValue V = formSplatFromShuffles(SVN, DAG))
21175 return V;
21176
21177 // If it is a splat, check if the argument vector is another splat or a
21178 // build_vector.
21179 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21180 int SplatIndex = SVN->getSplatIndex();
21181 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21182 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21183 // splat (vector_bo L, R), Index -->
21184 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21185 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21186 SDLoc DL(N);
21187 EVT EltVT = VT.getScalarType();
21188 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21189 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21190 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21191 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21192 N0.getNode()->getFlags());
21193 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21194 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21195 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21196 }
21197
21198 // If this is a bit convert that changes the element type of the vector but
21199 // not the number of vector elements, look through it. Be careful not to
21200 // look though conversions that change things like v4f32 to v2f64.
21201 SDNode *V = N0.getNode();
21202 if (V->getOpcode() == ISD::BITCAST) {
21203 SDValue ConvInput = V->getOperand(0);
21204 if (ConvInput.getValueType().isVector() &&
21205 ConvInput.getValueType().getVectorNumElements() == NumElts)
21206 V = ConvInput.getNode();
21207 }
21208
21209 if (V->getOpcode() == ISD::BUILD_VECTOR) {
21210 assert(V->getNumOperands() == NumElts &&((void)0)
21211 "BUILD_VECTOR has wrong number of operands")((void)0);
21212 SDValue Base;
21213 bool AllSame = true;
21214 for (unsigned i = 0; i != NumElts; ++i) {
21215 if (!V->getOperand(i).isUndef()) {
21216 Base = V->getOperand(i);
21217 break;
21218 }
21219 }
21220 // Splat of <u, u, u, u>, return <u, u, u, u>
21221 if (!Base.getNode())
21222 return N0;
21223 for (unsigned i = 0; i != NumElts; ++i) {
21224 if (V->getOperand(i) != Base) {
21225 AllSame = false;
21226 break;
21227 }
21228 }
21229 // Splat of <x, x, x, x>, return <x, x, x, x>
21230 if (AllSame)
21231 return N0;
21232
21233 // Canonicalize any other splat as a build_vector.
21234 SDValue Splatted = V->getOperand(SplatIndex);
21235 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21236 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21237
21238 // We may have jumped through bitcasts, so the type of the
21239 // BUILD_VECTOR may not match the type of the shuffle.
21240 if (V->getValueType(0) != VT)
21241 NewBV = DAG.getBitcast(VT, NewBV);
21242 return NewBV;
21243 }
21244 }
21245
21246 // Simplify source operands based on shuffle mask.
21247 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21248 return SDValue(N, 0);
21249
21250 // This is intentionally placed after demanded elements simplification because
21251 // it could eliminate knowledge of undef elements created by this shuffle.
21252 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21253 return ShufOp;
21254
21255 // Match shuffles that can be converted to any_vector_extend_in_reg.
21256 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21257 return V;
21258
21259 // Combine "truncate_vector_in_reg" style shuffles.
21260 if (SDValue V = combineTruncationShuffle(SVN, DAG))
21261 return V;
21262
21263 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21264 Level < AfterLegalizeVectorOps &&
21265 (N1.isUndef() ||
21266 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21267 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21268 if (SDValue V = partitionShuffleOfConcats(N, DAG))
21269 return V;
21270 }
21271
21272 // A shuffle of a concat of the same narrow vector can be reduced to use
21273 // only low-half elements of a concat with undef:
21274 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21275 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21276 N0.getNumOperands() == 2 &&
21277 N0.getOperand(0) == N0.getOperand(1)) {
21278 int HalfNumElts = (int)NumElts / 2;
21279 SmallVector<int, 8> NewMask;
21280 for (unsigned i = 0; i != NumElts; ++i) {
21281 int Idx = SVN->getMaskElt(i);
21282 if (Idx >= HalfNumElts) {
21283 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")((void)0);
21284 Idx -= HalfNumElts;
21285 }
21286 NewMask.push_back(Idx);
21287 }
21288 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21289 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21290 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21291 N0.getOperand(0), UndefVec);
21292 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21293 }
21294 }
21295
21296 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21297 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21298 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21299 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21300 return Res;
21301
21302 // If this shuffle only has a single input that is a bitcasted shuffle,
21303 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21304 // back to their original types.
21305 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21306 N1.isUndef() && Level < AfterLegalizeVectorOps &&
21307 TLI.isTypeLegal(VT)) {
21308
21309 SDValue BC0 = peekThroughOneUseBitcasts(N0);
21310 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21311 EVT SVT = VT.getScalarType();
21312 EVT InnerVT = BC0->getValueType(0);
21313 EVT InnerSVT = InnerVT.getScalarType();
21314
21315 // Determine which shuffle works with the smaller scalar type.
21316 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21317 EVT ScaleSVT = ScaleVT.getScalarType();
21318
21319 if (TLI.isTypeLegal(ScaleVT) &&
21320 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21321 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21322 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21323 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21324
21325 // Scale the shuffle masks to the smaller scalar type.
21326 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21327 SmallVector<int, 8> InnerMask;
21328 SmallVector<int, 8> OuterMask;
21329 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21330 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21331
21332 // Merge the shuffle masks.
21333 SmallVector<int, 8> NewMask;
21334 for (int M : OuterMask)
21335 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21336
21337 // Test for shuffle mask legality over both commutations.
21338 SDValue SV0 = BC0->getOperand(0);
21339 SDValue SV1 = BC0->getOperand(1);
21340 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21341 if (!LegalMask) {
21342 std::swap(SV0, SV1);
21343 ShuffleVectorSDNode::commuteMask(NewMask);
21344 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21345 }
21346
21347 if (LegalMask) {
21348 SV0 = DAG.getBitcast(ScaleVT, SV0);
21349 SV1 = DAG.getBitcast(ScaleVT, SV1);
21350 return DAG.getBitcast(
21351 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21352 }
21353 }
21354 }
21355 }
21356
21357 // Compute the combined shuffle mask for a shuffle with SV0 as the first
21358 // operand, and SV1 as the second operand.
21359 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21360 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21361 auto MergeInnerShuffle =
21362 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21363 ShuffleVectorSDNode *OtherSVN, SDValue N1,
21364 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21365 SmallVectorImpl<int> &Mask) -> bool {
21366 // Don't try to fold splats; they're likely to simplify somehow, or they
21367 // might be free.
21368 if (OtherSVN->isSplat())
21369 return false;
21370
21371 SV0 = SV1 = SDValue();
21372 Mask.clear();
21373
21374 for (unsigned i = 0; i != NumElts; ++i) {
21375 int Idx = SVN->getMaskElt(i);
21376 if (Idx < 0) {
21377 // Propagate Undef.
21378 Mask.push_back(Idx);
21379 continue;
21380 }
21381
21382 if (Commute)
21383 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21384
21385 SDValue CurrentVec;
21386 if (Idx < (int)NumElts) {
21387 // This shuffle index refers to the inner shuffle N0. Lookup the inner
21388 // shuffle mask to identify which vector is actually referenced.
21389 Idx = OtherSVN->getMaskElt(Idx);
21390 if (Idx < 0) {
21391 // Propagate Undef.
21392 Mask.push_back(Idx);
21393 continue;
21394 }
21395 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21396 : OtherSVN->getOperand(1);
21397 } else {
21398 // This shuffle index references an element within N1.
21399 CurrentVec = N1;
21400 }
21401
21402 // Simple case where 'CurrentVec' is UNDEF.
21403 if (CurrentVec.isUndef()) {
21404 Mask.push_back(-1);
21405 continue;
21406 }
21407
21408 // Canonicalize the shuffle index. We don't know yet if CurrentVec
21409 // will be the first or second operand of the combined shuffle.
21410 Idx = Idx % NumElts;
21411 if (!SV0.getNode() || SV0 == CurrentVec) {
21412 // Ok. CurrentVec is the left hand side.
21413 // Update the mask accordingly.
21414 SV0 = CurrentVec;
21415 Mask.push_back(Idx);
21416 continue;
21417 }
21418 if (!SV1.getNode() || SV1 == CurrentVec) {
21419 // Ok. CurrentVec is the right hand side.
21420 // Update the mask accordingly.
21421 SV1 = CurrentVec;
21422 Mask.push_back(Idx + NumElts);
21423 continue;
21424 }
21425
21426 // Last chance - see if the vector is another shuffle and if it
21427 // uses one of the existing candidate shuffle ops.
21428 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21429 int InnerIdx = CurrentSVN->getMaskElt(Idx);
21430 if (InnerIdx < 0) {
21431 Mask.push_back(-1);
21432 continue;
21433 }
21434 SDValue InnerVec = (InnerIdx < (int)NumElts)
21435 ? CurrentSVN->getOperand(0)
21436 : CurrentSVN->getOperand(1);
21437 if (InnerVec.isUndef()) {
21438 Mask.push_back(-1);
21439 continue;
21440 }
21441 InnerIdx %= NumElts;
21442 if (InnerVec == SV0) {
21443 Mask.push_back(InnerIdx);
21444 continue;
21445 }
21446 if (InnerVec == SV1) {
21447 Mask.push_back(InnerIdx + NumElts);
21448 continue;
21449 }
21450 }
21451
21452 // Bail out if we cannot convert the shuffle pair into a single shuffle.
21453 return false;
21454 }
21455
21456 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21457 return true;
21458
21459 // Avoid introducing shuffles with illegal mask.
21460 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21461 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21462 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21463 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21464 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21465 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21466 if (TLI.isShuffleMaskLegal(Mask, VT))
21467 return true;
21468
21469 std::swap(SV0, SV1);
21470 ShuffleVectorSDNode::commuteMask(Mask);
21471 return TLI.isShuffleMaskLegal(Mask, VT);
21472 };
21473
21474 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21475 // Canonicalize shuffles according to rules:
21476 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21477 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21478 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21479 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21480 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21481 // The incoming shuffle must be of the same type as the result of the
21482 // current shuffle.
21483 assert(N1->getOperand(0).getValueType() == VT &&((void)0)
21484 "Shuffle types don't match")((void)0);
21485
21486 SDValue SV0 = N1->getOperand(0);
21487 SDValue SV1 = N1->getOperand(1);
21488 bool HasSameOp0 = N0 == SV0;
21489 bool IsSV1Undef = SV1.isUndef();
21490 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21491 // Commute the operands of this shuffle so merging below will trigger.
21492 return DAG.getCommutedVectorShuffle(*SVN);
21493 }
21494
21495 // Canonicalize splat shuffles to the RHS to improve merging below.
21496 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21497 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21498 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21499 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21500 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21501 return DAG.getCommutedVectorShuffle(*SVN);
21502 }
21503
21504 // Try to fold according to rules:
21505 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21506 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21507 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21508 // Don't try to fold shuffles with illegal type.
21509 // Only fold if this shuffle is the only user of the other shuffle.
21510 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21511 for (int i = 0; i != 2; ++i) {
21512 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21513 N->isOnlyUserOf(N->getOperand(i).getNode())) {
21514 // The incoming shuffle must be of the same type as the result of the
21515 // current shuffle.
21516 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21517 assert(OtherSV->getOperand(0).getValueType() == VT &&((void)0)
21518 "Shuffle types don't match")((void)0);
21519
21520 SDValue SV0, SV1;
21521 SmallVector<int, 4> Mask;
21522 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21523 SV0, SV1, Mask)) {
21524 // Check if all indices in Mask are Undef. In case, propagate Undef.
21525 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21526 return DAG.getUNDEF(VT);
21527
21528 return DAG.getVectorShuffle(VT, SDLoc(N),
21529 SV0 ? SV0 : DAG.getUNDEF(VT),
21530 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21531 }
21532 }
21533 }
21534
21535 // Merge shuffles through binops if we are able to merge it with at least
21536 // one other shuffles.
21537 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21538 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21539 unsigned SrcOpcode = N0.getOpcode();
21540 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21541 (N1.isUndef() ||
21542 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21543 // Get binop source ops, or just pass on the undef.
21544 SDValue Op00 = N0.getOperand(0);
21545 SDValue Op01 = N0.getOperand(1);
21546 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21547 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21548 // TODO: We might be able to relax the VT check but we don't currently
21549 // have any isBinOp() that has different result/ops VTs so play safe until
21550 // we have test coverage.
21551 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21552 Op01.getValueType() == VT && Op11.getValueType() == VT &&
21553 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21554 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21555 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21556 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21557 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21558 SmallVectorImpl<int> &Mask, bool LeftOp,
21559 bool Commute) {
21560 SDValue InnerN = Commute ? N1 : N0;
21561 SDValue Op0 = LeftOp ? Op00 : Op01;
21562 SDValue Op1 = LeftOp ? Op10 : Op11;
21563 if (Commute)
21564 std::swap(Op0, Op1);
21565 // Only accept the merged shuffle if we don't introduce undef elements,
21566 // or the inner shuffle already contained undef elements.
21567 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21568 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21569 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21570 Mask) &&
21571 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21572 llvm::none_of(Mask, [](int M) { return M < 0; }));
21573 };
21574
21575 // Ensure we don't increase the number of shuffles - we must merge a
21576 // shuffle from at least one of the LHS and RHS ops.
21577 bool MergedLeft = false;
21578 SDValue LeftSV0, LeftSV1;
21579 SmallVector<int, 4> LeftMask;
21580 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21581 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21582 MergedLeft = true;
21583 } else {
21584 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21585 LeftSV0 = Op00, LeftSV1 = Op10;
21586 }
21587
21588 bool MergedRight = false;
21589 SDValue RightSV0, RightSV1;
21590 SmallVector<int, 4> RightMask;
21591 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21592 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21593 MergedRight = true;
21594 } else {
21595 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21596 RightSV0 = Op01, RightSV1 = Op11;
21597 }
21598
21599 if (MergedLeft || MergedRight) {
21600 SDLoc DL(N);
21601 SDValue LHS = DAG.getVectorShuffle(
21602 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21603 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21604 SDValue RHS = DAG.getVectorShuffle(
21605 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21606 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21607 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21608 }
21609 }
21610 }
21611 }
21612
21613 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21614 return V;
21615
21616 return SDValue();
21617}
21618
21619SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21620 SDValue InVal = N->getOperand(0);
21621 EVT VT = N->getValueType(0);
21622
21623 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21624 // with a VECTOR_SHUFFLE and possible truncate.
21625 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21626 VT.isFixedLengthVector() &&
21627 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21628 SDValue InVec = InVal->getOperand(0);
21629 SDValue EltNo = InVal->getOperand(1);
21630 auto InVecT = InVec.getValueType();
21631 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21632 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21633 int Elt = C0->getZExtValue();
21634 NewMask[0] = Elt;
21635 // If we have an implict truncate do truncate here as long as it's legal.
21636 // if it's not legal, this should
21637 if (VT.getScalarType() != InVal.getValueType() &&
21638 InVal.getValueType().isScalarInteger() &&
21639 isTypeLegal(VT.getScalarType())) {
21640 SDValue Val =
21641 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21642 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21643 }
21644 if (VT.getScalarType() == InVecT.getScalarType() &&
21645 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21646 SDValue LegalShuffle =
21647 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21648 DAG.getUNDEF(InVecT), NewMask, DAG);
21649 if (LegalShuffle) {
21650 // If the initial vector is the correct size this shuffle is a
21651 // valid result.
21652 if (VT == InVecT)
21653 return LegalShuffle;
21654 // If not we must truncate the vector.
21655 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21656 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21657 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21658 InVecT.getVectorElementType(),
21659 VT.getVectorNumElements());
21660 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21661 LegalShuffle, ZeroIdx);
21662 }
21663 }
21664 }
21665 }
21666 }
21667
21668 return SDValue();
21669}
21670
21671SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21672 EVT VT = N->getValueType(0);
21673 SDValue N0 = N->getOperand(0);
21674 SDValue N1 = N->getOperand(1);
21675 SDValue N2 = N->getOperand(2);
21676 uint64_t InsIdx = N->getConstantOperandVal(2);
21677
21678 // If inserting an UNDEF, just return the original vector.
21679 if (N1.isUndef())
21680 return N0;
21681
21682 // If this is an insert of an extracted vector into an undef vector, we can
21683 // just use the input to the extract.
21684 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21685 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21686 return N1.getOperand(0);
21687
21688 // If we are inserting a bitcast value into an undef, with the same
21689 // number of elements, just use the bitcast input of the extract.
21690 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21691 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21692 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21693 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21694 N1.getOperand(0).getOperand(1) == N2 &&
21695 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21696 VT.getVectorElementCount() &&
21697 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21698 VT.getSizeInBits()) {
21699 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21700 }
21701
21702 // If both N1 and N2 are bitcast values on which insert_subvector
21703 // would makes sense, pull the bitcast through.
21704 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21705 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21706 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21707 SDValue CN0 = N0.getOperand(0);
21708 SDValue CN1 = N1.getOperand(0);
21709 EVT CN0VT = CN0.getValueType();
21710 EVT CN1VT = CN1.getValueType();
21711 if (CN0VT.isVector() && CN1VT.isVector() &&
21712 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21713 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21714 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21715 CN0.getValueType(), CN0, CN1, N2);
21716 return DAG.getBitcast(VT, NewINSERT);
21717 }
21718 }
21719
21720 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21721 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21722 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21723 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21724 N0.getOperand(1).getValueType() == N1.getValueType() &&
21725 N0.getOperand(2) == N2)
21726 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21727 N1, N2);
21728
21729 // Eliminate an intermediate insert into an undef vector:
21730 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21731 // insert_subvector undef, X, N2
21732 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21733 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21734 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21735 N1.getOperand(1), N2);
21736
21737 // Push subvector bitcasts to the output, adjusting the index as we go.
21738 // insert_subvector(bitcast(v), bitcast(s), c1)
21739 // -> bitcast(insert_subvector(v, s, c2))
21740 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21741 N1.getOpcode() == ISD::BITCAST) {
21742 SDValue N0Src = peekThroughBitcasts(N0);
21743 SDValue N1Src = peekThroughBitcasts(N1);
21744 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21745 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21746 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21747 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21748 EVT NewVT;
21749 SDLoc DL(N);
21750 SDValue NewIdx;
21751 LLVMContext &Ctx = *DAG.getContext();
21752 ElementCount NumElts = VT.getVectorElementCount();
21753 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21754 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21755 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21756 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21757 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21758 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21759 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21760 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21761 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21762 NumElts.divideCoefficientBy(Scale));
21763 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21764 }
21765 }
21766 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21767 SDValue Res = DAG.getBitcast(NewVT, N0Src);
21768 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21769 return DAG.getBitcast(VT, Res);
21770 }
21771 }
21772 }
21773
21774 // Canonicalize insert_subvector dag nodes.
21775 // Example:
21776 // (insert_subvector (insert_subvector A, Idx0), Idx1)
21777 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21778 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21779 N1.getValueType() == N0.getOperand(1).getValueType()) {
21780 unsigned OtherIdx = N0.getConstantOperandVal(2);
21781 if (InsIdx < OtherIdx) {
21782 // Swap nodes.
21783 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21784 N0.getOperand(0), N1, N2);
21785 AddToWorklist(NewOp.getNode());
21786 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21787 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21788 }
21789 }
21790
21791 // If the input vector is a concatenation, and the insert replaces
21792 // one of the pieces, we can optimize into a single concat_vectors.
21793 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21794 N0.getOperand(0).getValueType() == N1.getValueType() &&
21795 N0.getOperand(0).getValueType().isScalableVector() ==
21796 N1.getValueType().isScalableVector()) {
21797 unsigned Factor = N1.getValueType().getVectorMinNumElements();
21798 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21799 Ops[InsIdx / Factor] = N1;
21800 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21801 }
21802
21803 // Simplify source operands based on insertion.
21804 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21805 return SDValue(N, 0);
21806
21807 return SDValue();
21808}
21809
21810SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21811 SDValue N0 = N->getOperand(0);
21812
21813 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21814 if (N0->getOpcode() == ISD::FP16_TO_FP)
21815 return N0->getOperand(0);
21816
21817 return SDValue();
21818}
21819
21820SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21821 SDValue N0 = N->getOperand(0);
21822
21823 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21824 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21825 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21826 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21827 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21828 N0.getOperand(0));
21829 }
21830 }
21831
21832 return SDValue();
21833}
21834
21835SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21836 SDValue N0 = N->getOperand(0);
21837 EVT VT = N0.getValueType();
21838 unsigned Opcode = N->getOpcode();
21839
21840 // VECREDUCE over 1-element vector is just an extract.
21841 if (VT.getVectorElementCount().isScalar()) {
21842 SDLoc dl(N);
21843 SDValue Res =
21844 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21845 DAG.getVectorIdxConstant(0, dl));
21846 if (Res.getValueType() != N->getValueType(0))
21847 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21848 return Res;
21849 }
21850
21851 // On an boolean vector an and/or reduction is the same as a umin/umax
21852 // reduction. Convert them if the latter is legal while the former isn't.
21853 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21854 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21855 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21856 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21857 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21858 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21859 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21860 }
21861
21862 return SDValue();
21863}
21864
21865/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21866/// with the destination vector and a zero vector.
21867/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21868/// vector_shuffle V, Zero, <0, 4, 2, 4>
21869SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21870 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")((void)0);
21871
21872 EVT VT = N->getValueType(0);
21873 SDValue LHS = N->getOperand(0);
21874 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21875 SDLoc DL(N);
21876
21877 // Make sure we're not running after operation legalization where it
21878 // may have custom lowered the vector shuffles.
21879 if (LegalOperations)
21880 return SDValue();
21881
21882 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21883 return SDValue();
21884
21885 EVT RVT = RHS.getValueType();
21886 unsigned NumElts = RHS.getNumOperands();
21887
21888 // Attempt to create a valid clear mask, splitting the mask into
21889 // sub elements and checking to see if each is
21890 // all zeros or all ones - suitable for shuffle masking.
21891 auto BuildClearMask = [&](int Split) {
21892 int NumSubElts = NumElts * Split;
21893 int NumSubBits = RVT.getScalarSizeInBits() / Split;
21894
21895 SmallVector<int, 8> Indices;
21896 for (int i = 0; i != NumSubElts; ++i) {
21897 int EltIdx = i / Split;
21898 int SubIdx = i % Split;
21899 SDValue Elt = RHS.getOperand(EltIdx);
21900 // X & undef --> 0 (not undef). So this lane must be converted to choose
21901 // from the zero constant vector (same as if the element had all 0-bits).
21902 if (Elt.isUndef()) {
21903 Indices.push_back(i + NumSubElts);
21904 continue;
21905 }
21906
21907 APInt Bits;
21908 if (isa<ConstantSDNode>(Elt))
21909 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21910 else if (isa<ConstantFPSDNode>(Elt))
21911 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21912 else
21913 return SDValue();
21914
21915 // Extract the sub element from the constant bit mask.
21916 if (DAG.getDataLayout().isBigEndian())
21917 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21918 else
21919 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21920
21921 if (Bits.isAllOnesValue())
21922 Indices.push_back(i);
21923 else if (Bits == 0)
21924 Indices.push_back(i + NumSubElts);
21925 else
21926 return SDValue();
21927 }
21928
21929 // Let's see if the target supports this vector_shuffle.
21930 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21931 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21932 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21933 return SDValue();
21934
21935 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21936 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21937 DAG.getBitcast(ClearVT, LHS),
21938 Zero, Indices));
21939 };
21940
21941 // Determine maximum split level (byte level masking).
21942 int MaxSplit = 1;
21943 if (RVT.getScalarSizeInBits() % 8 == 0)
21944 MaxSplit = RVT.getScalarSizeInBits() / 8;
21945
21946 for (int Split = 1; Split <= MaxSplit; ++Split)
21947 if (RVT.getScalarSizeInBits() % Split == 0)
21948 if (SDValue S = BuildClearMask(Split))
21949 return S;
21950
21951 return SDValue();
21952}
21953
21954/// If a vector binop is performed on splat values, it may be profitable to
21955/// extract, scalarize, and insert/splat.
21956static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21957 SDValue N0 = N->getOperand(0);
21958 SDValue N1 = N->getOperand(1);
21959 unsigned Opcode = N->getOpcode();
21960 EVT VT = N->getValueType(0);
21961 EVT EltVT = VT.getVectorElementType();
21962 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21963
21964 // TODO: Remove/replace the extract cost check? If the elements are available
21965 // as scalars, then there may be no extract cost. Should we ask if
21966 // inserting a scalar back into a vector is cheap instead?
21967 int Index0, Index1;
21968 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21969 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21970 if (!Src0 || !Src1 || Index0 != Index1 ||
21971 Src0.getValueType().getVectorElementType() != EltVT ||
21972 Src1.getValueType().getVectorElementType() != EltVT ||
21973 !TLI.isExtractVecEltCheap(VT, Index0) ||
21974 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
21975 return SDValue();
21976
21977 SDLoc DL(N);
21978 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21979 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21980 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21981 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21982
21983 // If all lanes but 1 are undefined, no need to splat the scalar result.
21984 // TODO: Keep track of undefs and use that info in the general case.
21985 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21986 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21987 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21988 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21989 // build_vec ..undef, (bo X, Y), undef...
21990 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21991 Ops[Index0] = ScalarBO;
21992 return DAG.getBuildVector(VT, DL, Ops);
21993 }
21994
21995 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21996 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21997 return DAG.getBuildVector(VT, DL, Ops);
21998}
21999
22000/// Visit a binary vector operation, like ADD.
22001SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
22002 assert(N->getValueType(0).isVector() &&((void)0)
22003 "SimplifyVBinOp only works on vectors!")((void)0);
22004
22005 SDValue LHS = N->getOperand(0);
22006 SDValue RHS = N->getOperand(1);
22007 SDValue Ops[] = {LHS, RHS};
22008 EVT VT = N->getValueType(0);
22009 unsigned Opcode = N->getOpcode();
22010 SDNodeFlags Flags = N->getFlags();
22011
22012 // See if we can constant fold the vector operation.
22013 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
22014 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
22015 return Fold;
22016
22017 // Move unary shuffles with identical masks after a vector binop:
22018 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22019 // --> shuffle (VBinOp A, B), Undef, Mask
22020 // This does not require type legality checks because we are creating the
22021 // same types of operations that are in the original sequence. We do have to
22022 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22023 // though. This code is adapted from the identical transform in instcombine.
22024 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22025 Opcode != ISD::UREM && Opcode != ISD::SREM &&
22026 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22027 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22028 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22029 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22030 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22031 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22032 SDLoc DL(N);
22033 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22034 RHS.getOperand(0), Flags);
22035 SDValue UndefV = LHS.getOperand(1);
22036 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22037 }
22038
22039 // Try to sink a splat shuffle after a binop with a uniform constant.
22040 // This is limited to cases where neither the shuffle nor the constant have
22041 // undefined elements because that could be poison-unsafe or inhibit
22042 // demanded elements analysis. It is further limited to not change a splat
22043 // of an inserted scalar because that may be optimized better by
22044 // load-folding or other target-specific behaviors.
22045 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22046 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22047 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22048 // binop (splat X), (splat C) --> splat (binop X, C)
22049 SDLoc DL(N);
22050 SDValue X = Shuf0->getOperand(0);
22051 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22052 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22053 Shuf0->getMask());
22054 }
22055 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22056 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22057 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22058 // binop (splat C), (splat X) --> splat (binop C, X)
22059 SDLoc DL(N);
22060 SDValue X = Shuf1->getOperand(0);
22061 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22062 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22063 Shuf1->getMask());
22064 }
22065 }
22066
22067 // The following pattern is likely to emerge with vector reduction ops. Moving
22068 // the binary operation ahead of insertion may allow using a narrower vector
22069 // instruction that has better performance than the wide version of the op:
22070 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22071 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22072 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22073 LHS.getOperand(2) == RHS.getOperand(2) &&
22074 (LHS.hasOneUse() || RHS.hasOneUse())) {
22075 SDValue X = LHS.getOperand(1);
22076 SDValue Y = RHS.getOperand(1);
22077 SDValue Z = LHS.getOperand(2);
22078 EVT NarrowVT = X.getValueType();
22079 if (NarrowVT == Y.getValueType() &&
22080 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22081 LegalOperations)) {
22082 // (binop undef, undef) may not return undef, so compute that result.
22083 SDLoc DL(N);
22084 SDValue VecC =
22085 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22086 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22087 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22088 }
22089 }
22090
22091 // Make sure all but the first op are undef or constant.
22092 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22093 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22094 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22095 return Op.isUndef() ||
22096 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22097 });
22098 };
22099
22100 // The following pattern is likely to emerge with vector reduction ops. Moving
22101 // the binary operation ahead of the concat may allow using a narrower vector
22102 // instruction that has better performance than the wide version of the op:
22103 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22104 // concat (VBinOp X, Y), VecC
22105 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22106 (LHS.hasOneUse() || RHS.hasOneUse())) {
22107 EVT NarrowVT = LHS.getOperand(0).getValueType();
22108 if (NarrowVT == RHS.getOperand(0).getValueType() &&
22109 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22110 SDLoc DL(N);
22111 unsigned NumOperands = LHS.getNumOperands();
22112 SmallVector<SDValue, 4> ConcatOps;
22113 for (unsigned i = 0; i != NumOperands; ++i) {
22114 // This constant fold for operands 1 and up.
22115 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22116 RHS.getOperand(i)));
22117 }
22118
22119 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22120 }
22121 }
22122
22123 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22124 return V;
22125
22126 return SDValue();
22127}
22128
22129SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22130 SDValue N2) {
22131 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")((void)0);
22132
22133 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22134 cast<CondCodeSDNode>(N0.getOperand(2))->get());
22135
22136 // If we got a simplified select_cc node back from SimplifySelectCC, then
22137 // break it down into a new SETCC node, and a new SELECT node, and then return
22138 // the SELECT node, since we were called with a SELECT node.
22139 if (SCC.getNode()) {
22140 // Check to see if we got a select_cc back (to turn into setcc/select).
22141 // Otherwise, just return whatever node we got back, like fabs.
22142 if (SCC.getOpcode() == ISD::SELECT_CC) {
22143 const SDNodeFlags Flags = N0.getNode()->getFlags();
22144 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22145 N0.getValueType(),
22146 SCC.getOperand(0), SCC.getOperand(1),
22147 SCC.getOperand(4), Flags);
22148 AddToWorklist(SETCC.getNode());
22149 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22150 SCC.getOperand(2), SCC.getOperand(3));
22151 SelectNode->setFlags(Flags);
22152 return SelectNode;
22153 }
22154
22155 return SCC;
22156 }
22157 return SDValue();
22158}
22159
22160/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22161/// being selected between, see if we can simplify the select. Callers of this
22162/// should assume that TheSelect is deleted if this returns true. As such, they
22163/// should return the appropriate thing (e.g. the node) back to the top-level of
22164/// the DAG combiner loop to avoid it being looked at.
22165bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22166 SDValue RHS) {
22167 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22168 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22169 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22170 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22171 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22172 SDValue Sqrt = RHS;
22173 ISD::CondCode CC;
22174 SDValue CmpLHS;
22175 const ConstantFPSDNode *Zero = nullptr;
22176
22177 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22178 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22179 CmpLHS = TheSelect->getOperand(0);
22180 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22181 } else {
22182 // SELECT or VSELECT
22183 SDValue Cmp = TheSelect->getOperand(0);
22184 if (Cmp.getOpcode() == ISD::SETCC) {
22185 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22186 CmpLHS = Cmp.getOperand(0);
22187 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22188 }
22189 }
22190 if (Zero && Zero->isZero() &&
22191 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22192 CC == ISD::SETULT || CC == ISD::SETLT)) {
22193 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22194 CombineTo(TheSelect, Sqrt);
22195 return true;
22196 }
22197 }
22198 }
22199 // Cannot simplify select with vector condition
22200 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22201
22202 // If this is a select from two identical things, try to pull the operation
22203 // through the select.
22204 if (LHS.getOpcode() != RHS.getOpcode() ||
22205 !LHS.hasOneUse() || !RHS.hasOneUse())
22206 return false;
22207
22208 // If this is a load and the token chain is identical, replace the select
22209 // of two loads with a load through a select of the address to load from.
22210 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22211 // constants have been dropped into the constant pool.
22212 if (LHS.getOpcode() == ISD::LOAD) {
22213 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22214 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22215
22216 // Token chains must be identical.
22217 if (LHS.getOperand(0) != RHS.getOperand(0) ||
22218 // Do not let this transformation reduce the number of volatile loads.
22219 // Be conservative for atomics for the moment
22220 // TODO: This does appear to be legal for unordered atomics (see D66309)
22221 !LLD->isSimple() || !RLD->isSimple() ||
22222 // FIXME: If either is a pre/post inc/dec load,
22223 // we'd need to split out the address adjustment.
22224 LLD->isIndexed() || RLD->isIndexed() ||
22225 // If this is an EXTLOAD, the VT's must match.
22226 LLD->getMemoryVT() != RLD->getMemoryVT() ||
22227 // If this is an EXTLOAD, the kind of extension must match.
22228 (LLD->getExtensionType() != RLD->getExtensionType() &&
22229 // The only exception is if one of the extensions is anyext.
22230 LLD->getExtensionType() != ISD::EXTLOAD &&
22231 RLD->getExtensionType() != ISD::EXTLOAD) ||
22232 // FIXME: this discards src value information. This is
22233 // over-conservative. It would be beneficial to be able to remember
22234 // both potential memory locations. Since we are discarding
22235 // src value info, don't do the transformation if the memory
22236 // locations are not in the default address space.
22237 LLD->getPointerInfo().getAddrSpace() != 0 ||
22238 RLD->getPointerInfo().getAddrSpace() != 0 ||
22239 // We can't produce a CMOV of a TargetFrameIndex since we won't
22240 // generate the address generation required.
22241 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22242 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22243 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22244 LLD->getBasePtr().getValueType()))
22245 return false;
22246
22247 // The loads must not depend on one another.
22248 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22249 return false;
22250
22251 // Check that the select condition doesn't reach either load. If so,
22252 // folding this will induce a cycle into the DAG. If not, this is safe to
22253 // xform, so create a select of the addresses.
22254
22255 SmallPtrSet<const SDNode *, 32> Visited;
22256 SmallVector<const SDNode *, 16> Worklist;
22257
22258 // Always fail if LLD and RLD are not independent. TheSelect is a
22259 // predecessor to all Nodes in question so we need not search past it.
22260
22261 Visited.insert(TheSelect);
22262 Worklist.push_back(LLD);
22263 Worklist.push_back(RLD);
22264
22265 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22266 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22267 return false;
22268
22269 SDValue Addr;
22270 if (TheSelect->getOpcode() == ISD::SELECT) {
22271 // We cannot do this optimization if any pair of {RLD, LLD} is a
22272 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22273 // Loads, we only need to check if CondNode is a successor to one of the
22274 // loads. We can further avoid this if there's no use of their chain
22275 // value.
22276 SDNode *CondNode = TheSelect->getOperand(0).getNode();
22277 Worklist.push_back(CondNode);
22278
22279 if ((LLD->hasAnyUseOfValue(1) &&
22280 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22281 (RLD->hasAnyUseOfValue(1) &&
22282 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22283 return false;
22284
22285 Addr = DAG.getSelect(SDLoc(TheSelect),
22286 LLD->getBasePtr().getValueType(),
22287 TheSelect->getOperand(0), LLD->getBasePtr(),
22288 RLD->getBasePtr());
22289 } else { // Otherwise SELECT_CC
22290 // We cannot do this optimization if any pair of {RLD, LLD} is a
22291 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22292 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22293 // one of the loads. We can further avoid this if there's no use of their
22294 // chain value.
22295
22296 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22297 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22298 Worklist.push_back(CondLHS);
22299 Worklist.push_back(CondRHS);
22300
22301 if ((LLD->hasAnyUseOfValue(1) &&
22302 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22303 (RLD->hasAnyUseOfValue(1) &&
22304 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22305 return false;
22306
22307 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22308 LLD->getBasePtr().getValueType(),
22309 TheSelect->getOperand(0),
22310 TheSelect->getOperand(1),
22311 LLD->getBasePtr(), RLD->getBasePtr(),
22312 TheSelect->getOperand(4));
22313 }
22314
22315 SDValue Load;
22316 // It is safe to replace the two loads if they have different alignments,
22317 // but the new load must be the minimum (most restrictive) alignment of the
22318 // inputs.
22319 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22320 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22321 if (!RLD->isInvariant())
22322 MMOFlags &= ~MachineMemOperand::MOInvariant;
22323 if (!RLD->isDereferenceable())
22324 MMOFlags &= ~MachineMemOperand::MODereferenceable;
22325 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22326 // FIXME: Discards pointer and AA info.
22327 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22328 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22329 MMOFlags);
22330 } else {
22331 // FIXME: Discards pointer and AA info.
22332 Load = DAG.getExtLoad(
22333 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22334 : LLD->getExtensionType(),
22335 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22336 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22337 }
22338
22339 // Users of the select now use the result of the load.
22340 CombineTo(TheSelect, Load);
22341
22342 // Users of the old loads now use the new load's chain. We know the
22343 // old-load value is dead now.
22344 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22345 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22346 return true;
22347 }
22348
22349 return false;
22350}
22351
22352/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22353/// bitwise 'and'.
22354SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22355 SDValue N1, SDValue N2, SDValue N3,
22356 ISD::CondCode CC) {
22357 // If this is a select where the false operand is zero and the compare is a
22358 // check of the sign bit, see if we can perform the "gzip trick":
22359 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22360 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22361 EVT XType = N0.getValueType();
22362 EVT AType = N2.getValueType();
22363 if (!isNullConstant(N3) || !XType.bitsGE(AType))
22364 return SDValue();
22365
22366 // If the comparison is testing for a positive value, we have to invert
22367 // the sign bit mask, so only do that transform if the target has a bitwise
22368 // 'and not' instruction (the invert is free).
22369 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22370 // (X > -1) ? A : 0
22371 // (X > 0) ? X : 0 <-- This is canonical signed max.
22372 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22373 return SDValue();
22374 } else if (CC == ISD::SETLT) {
22375 // (X < 0) ? A : 0
22376 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22377 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22378 return SDValue();
22379 } else {
22380 return SDValue();
22381 }
22382
22383 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22384 // constant.
22385 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22386 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22387 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22388 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22389 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22390 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22391 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22392 AddToWorklist(Shift.getNode());
22393
22394 if (XType.bitsGT(AType)) {
22395 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22396 AddToWorklist(Shift.getNode());
22397 }
22398
22399 if (CC == ISD::SETGT)
22400 Shift = DAG.getNOT(DL, Shift, AType);
22401
22402 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22403 }
22404 }
22405
22406 unsigned ShCt = XType.getSizeInBits() - 1;
22407 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22408 return SDValue();
22409
22410 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22411 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22412 AddToWorklist(Shift.getNode());
22413
22414 if (XType.bitsGT(AType)) {
22415 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22416 AddToWorklist(Shift.getNode());
22417 }
22418
22419 if (CC == ISD::SETGT)
22420 Shift = DAG.getNOT(DL, Shift, AType);
22421
22422 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22423}
22424
22425// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22426SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22427 SDValue N0 = N->getOperand(0);
22428 SDValue N1 = N->getOperand(1);
22429 SDValue N2 = N->getOperand(2);
22430 EVT VT = N->getValueType(0);
22431 SDLoc DL(N);
22432
22433 unsigned BinOpc = N1.getOpcode();
22434 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22435 return SDValue();
22436
22437 if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
22438 return SDValue();
22439
22440 // Fold select(cond, binop(x, y), binop(z, y))
22441 // --> binop(select(cond, x, z), y)
22442 if (N1.getOperand(1) == N2.getOperand(1)) {
22443 SDValue NewSel =
22444 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22445 SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
22446 NewBinOp->setFlags(N1->getFlags());
22447 NewBinOp->intersectFlagsWith(N2->getFlags());
22448 return NewBinOp;
22449 }
22450
22451 // Fold select(cond, binop(x, y), binop(x, z))
22452 // --> binop(x, select(cond, y, z))
22453 // Second op VT might be different (e.g. shift amount type)
22454 if (N1.getOperand(0) == N2.getOperand(0) &&
22455 VT == N1.getOperand(1).getValueType() &&
22456 VT == N2.getOperand(1).getValueType()) {
22457 SDValue NewSel =
22458 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22459 SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
22460 NewBinOp->setFlags(N1->getFlags());
22461 NewBinOp->intersectFlagsWith(N2->getFlags());
22462 return NewBinOp;
22463 }
22464
22465 // TODO: Handle isCommutativeBinOp patterns as well?
22466 return SDValue();
22467}
22468
22469// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22470SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22471 SDValue N0 = N->getOperand(0);
22472 EVT VT = N->getValueType(0);
22473 bool IsFabs = N->getOpcode() == ISD::FABS;
22474 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22475
22476 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22477 return SDValue();
22478
22479 SDValue Int = N0.getOperand(0);
22480 EVT IntVT = Int.getValueType();
22481
22482 // The operand to cast should be integer.
22483 if (!IntVT.isInteger() || IntVT.isVector())
22484 return SDValue();
22485
22486 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22487 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22488 APInt SignMask;
22489 if (N0.getValueType().isVector()) {
22490 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22491 // 0x7f...) per element and splat it.
22492 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22493 if (IsFabs)
22494 SignMask = ~SignMask;
22495 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22496 } else {
22497 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22498 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22499 if (IsFabs)
22500 SignMask = ~SignMask;
22501 }
22502 SDLoc DL(N0);
22503 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22504 DAG.getConstant(SignMask, DL, IntVT));
22505 AddToWorklist(Int.getNode());
22506 return DAG.getBitcast(VT, Int);
22507}
22508
22509/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22510/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22511/// in it. This may be a win when the constant is not otherwise available
22512/// because it replaces two constant pool loads with one.
22513SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22514 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22515 ISD::CondCode CC) {
22516 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22517 return SDValue();
22518
22519 // If we are before legalize types, we want the other legalization to happen
22520 // first (for example, to avoid messing with soft float).
22521 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22522 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22523 EVT VT = N2.getValueType();
22524 if (!TV || !FV || !TLI.isTypeLegal(VT))
22525 return SDValue();
22526
22527 // If a constant can be materialized without loads, this does not make sense.
22528 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22529 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22530 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22531 return SDValue();
22532
22533 // If both constants have multiple uses, then we won't need to do an extra
22534 // load. The values are likely around in registers for other users.
22535 if (!TV->hasOneUse() && !FV->hasOneUse())
22536 return SDValue();
22537
22538 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22539 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22540 Type *FPTy = Elts[0]->getType();
22541 const DataLayout &TD = DAG.getDataLayout();
22542
22543 // Create a ConstantArray of the two constants.
22544 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22545 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22546 TD.getPrefTypeAlign(FPTy));
22547 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22548
22549 // Get offsets to the 0 and 1 elements of the array, so we can select between
22550 // them.
22551 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22552 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22553 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22554 SDValue Cond =
22555 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22556 AddToWorklist(Cond.getNode());
22557 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22558 AddToWorklist(CstOffset.getNode());
22559 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22560 AddToWorklist(CPIdx.getNode());
22561 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22562 MachinePointerInfo::getConstantPool(
22563 DAG.getMachineFunction()), Alignment);
22564}
22565
22566/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22567/// where 'cond' is the comparison specified by CC.
22568SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22569 SDValue N2, SDValue N3, ISD::CondCode CC,
22570 bool NotExtCompare) {
22571 // (x ? y : y) -> y.
22572 if (N2 == N3) return N2;
22573
22574 EVT CmpOpVT = N0.getValueType();
22575 EVT CmpResVT = getSetCCResultType(CmpOpVT);
22576 EVT VT = N2.getValueType();
22577 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22578 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22579 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22580
22581 // Determine if the condition we're dealing with is constant.
22582 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22583 AddToWorklist(SCC.getNode());
22584 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22585 // fold select_cc true, x, y -> x
22586 // fold select_cc false, x, y -> y
22587 return !(SCCC->isNullValue()) ? N2 : N3;
22588 }
22589 }
22590
22591 if (SDValue V =
22592 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22593 return V;
22594
22595 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22596 return V;
22597
22598 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22599 // where y is has a single bit set.
22600 // A plaintext description would be, we can turn the SELECT_CC into an AND
22601 // when the condition can be materialized as an all-ones register. Any
22602 // single bit-test can be materialized as an all-ones register with
22603 // shift-left and shift-right-arith.
22604 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22605 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22606 SDValue AndLHS = N0->getOperand(0);
22607 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22608 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22609 // Shift the tested bit over the sign bit.
22610 const APInt &AndMask = ConstAndRHS->getAPIntValue();
22611 unsigned ShCt = AndMask.getBitWidth() - 1;
22612 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22613 SDValue ShlAmt =
22614 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22615 getShiftAmountTy(AndLHS.getValueType()));
22616 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22617
22618 // Now arithmetic right shift it all the way over, so the result is
22619 // either all-ones, or zero.
22620 SDValue ShrAmt =
22621 DAG.getConstant(ShCt, SDLoc(Shl),
22622 getShiftAmountTy(Shl.getValueType()));
22623 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22624
22625 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22626 }
22627 }
22628 }
22629
22630 // fold select C, 16, 0 -> shl C, 4
22631 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22632 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22633
22634 if ((Fold || Swap) &&
22635 TLI.getBooleanContents(CmpOpVT) ==
22636 TargetLowering::ZeroOrOneBooleanContent &&
22637 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22638
22639 if (Swap) {
22640 CC = ISD::getSetCCInverse(CC, CmpOpVT);
22641 std::swap(N2C, N3C);
22642 }
22643
22644 // If the caller doesn't want us to simplify this into a zext of a compare,
22645 // don't do it.
22646 if (NotExtCompare && N2C->isOne())
22647 return SDValue();
22648
22649 SDValue Temp, SCC;
22650 // zext (setcc n0, n1)
22651 if (LegalTypes) {
22652 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22653 if (VT.bitsLT(SCC.getValueType()))
22654 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22655 else
22656 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22657 } else {
22658 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22659 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22660 }
22661
22662 AddToWorklist(SCC.getNode());
22663 AddToWorklist(Temp.getNode());
22664
22665 if (N2C->isOne())
22666 return Temp;
22667
22668 unsigned ShCt = N2C->getAPIntValue().logBase2();
22669 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22670 return SDValue();
22671
22672 // shl setcc result by log2 n2c
22673 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22674 DAG.getConstant(ShCt, SDLoc(Temp),
22675 getShiftAmountTy(Temp.getValueType())));
22676 }
22677
22678 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22679 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22680 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22681 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22682 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22683 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22684 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22685 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22686 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22687 SDValue ValueOnZero = N2;
22688 SDValue Count = N3;
22689 // If the condition is NE instead of E, swap the operands.
22690 if (CC == ISD::SETNE)
22691 std::swap(ValueOnZero, Count);
22692 // Check if the value on zero is a constant equal to the bits in the type.
22693 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22694 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22695 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22696 // legal, combine to just cttz.
22697 if ((Count.getOpcode() == ISD::CTTZ ||
22698 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22699 N0 == Count.getOperand(0) &&
22700 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22701 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22702 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22703 // legal, combine to just ctlz.
22704 if ((Count.getOpcode() == ISD::CTLZ ||
22705 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22706 N0 == Count.getOperand(0) &&
22707 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22708 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22709 }
22710 }
22711 }
22712
22713 return SDValue();
22714}
22715
22716/// This is a stub for TargetLowering::SimplifySetCC.
22717SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22718 ISD::CondCode Cond, const SDLoc &DL,
22719 bool foldBooleans) {
22720 TargetLowering::DAGCombinerInfo
22721 DagCombineInfo(DAG, Level, false, this);
22722 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22723}
22724
22725/// Given an ISD::SDIV node expressing a divide by constant, return
22726/// a DAG expression to select that will generate the same value by multiplying
22727/// by a magic number.
22728/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22729SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22730 // when optimising for minimum size, we don't want to expand a div to a mul
22731 // and a shift.
22732 if (DAG.getMachineFunction().getFunction().hasMinSize())
22733 return SDValue();
22734
22735 SmallVector<SDNode *, 8> Built;
22736 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22737 for (SDNode *N : Built)
22738 AddToWorklist(N);
22739 return S;
22740 }
22741
22742 return SDValue();
22743}
22744
22745/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22746/// DAG expression that will generate the same value by right shifting.
22747SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22748 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22749 if (!C)
22750 return SDValue();
22751
22752 // Avoid division by zero.
22753 if (C->isNullValue())
22754 return SDValue();
22755
22756 SmallVector<SDNode *, 8> Built;
22757 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22758 for (SDNode *N : Built)
22759 AddToWorklist(N);
22760 return S;
22761 }
22762
22763 return SDValue();
22764}
22765
22766/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22767/// expression that will generate the same value by multiplying by a magic
22768/// number.
22769/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22770SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22771 // when optimising for minimum size, we don't want to expand a div to a mul
22772 // and a shift.
22773 if (DAG.getMachineFunction().getFunction().hasMinSize())
22774 return SDValue();
22775
22776 SmallVector<SDNode *, 8> Built;
22777 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22778 for (SDNode *N : Built)
22779 AddToWorklist(N);
22780 return S;
22781 }
22782
22783 return SDValue();
22784}
22785
22786/// Determines the LogBase2 value for a non-null input value using the
22787/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22788SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22789 EVT VT = V.getValueType();
22790 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22791 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22792 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22793 return LogBase2;
22794}
22795
22796/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22797/// For the reciprocal, we need to find the zero of the function:
22798/// F(X) = A X - 1 [which has a zero at X = 1/A]
22799/// =>
22800/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22801/// does not require additional intermediate precision]
22802/// For the last iteration, put numerator N into it to gain more precision:
22803/// Result = N X_i + X_i (N - N A X_i)
22804SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22805 SDNodeFlags Flags) {
22806 if (LegalDAG)
22807 return SDValue();
22808
22809 // TODO: Handle half and/or extended types?
22810 EVT VT = Op.getValueType();
22811 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22812 return SDValue();
22813
22814 // If estimates are explicitly disabled for this function, we're done.
22815 MachineFunction &MF = DAG.getMachineFunction();
22816 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22817 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22818 return SDValue();
22819
22820 // Estimates may be explicitly enabled for this type with a custom number of
22821 // refinement steps.
22822 int Iterations = TLI.getDivRefinementSteps(VT, MF);
22823 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22824 AddToWorklist(Est.getNode());
22825
22826 SDLoc DL(Op);
22827 if (Iterations) {
22828 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22829
22830 // Newton iterations: Est = Est + Est (N - Arg * Est)
22831 // If this is the last iteration, also multiply by the numerator.
22832 for (int i = 0; i < Iterations; ++i) {
22833 SDValue MulEst = Est;
22834
22835 if (i == Iterations - 1) {
22836 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22837 AddToWorklist(MulEst.getNode());
22838 }
22839
22840 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22841 AddToWorklist(NewEst.getNode());
22842
22843 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22844 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22845 AddToWorklist(NewEst.getNode());
22846
22847 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22848 AddToWorklist(NewEst.getNode());
22849
22850 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22851 AddToWorklist(Est.getNode());
22852 }
22853 } else {
22854 // If no iterations are available, multiply with N.
22855 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22856 AddToWorklist(Est.getNode());
22857 }
22858
22859 return Est;
22860 }
22861
22862 return SDValue();
22863}
22864
22865/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22866/// For the reciprocal sqrt, we need to find the zero of the function:
22867/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22868/// =>
22869/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22870/// As a result, we precompute A/2 prior to the iteration loop.
22871SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22872 unsigned Iterations,
22873 SDNodeFlags Flags, bool Reciprocal) {
22874 EVT VT = Arg.getValueType();
22875 SDLoc DL(Arg);
22876 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22877
22878 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22879 // this entire sequence requires only one FP constant.
22880 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22881 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22882
22883 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22884 for (unsigned i = 0; i < Iterations; ++i) {
22885 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22886 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22887 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22888 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22889 }
22890
22891 // If non-reciprocal square root is requested, multiply the result by Arg.
22892 if (!Reciprocal)
22893 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22894
22895 return Est;
22896}
22897
22898/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22899/// For the reciprocal sqrt, we need to find the zero of the function:
22900/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22901/// =>
22902/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22903SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22904 unsigned Iterations,
22905 SDNodeFlags Flags, bool Reciprocal) {
22906 EVT VT = Arg.getValueType();
22907 SDLoc DL(Arg);
22908 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22909 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22910
22911 // This routine must enter the loop below to work correctly
22912 // when (Reciprocal == false).
22913 assert(Iterations > 0)((void)0);
22914
22915 // Newton iterations for reciprocal square root:
22916 // E = (E * -0.5) * ((A * E) * E + -3.0)
22917 for (unsigned i = 0; i < Iterations; ++i) {
22918 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22919 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22920 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22921
22922 // When calculating a square root at the last iteration build:
22923 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22924 // (notice a common subexpression)
22925 SDValue LHS;
22926 if (Reciprocal || (i + 1) < Iterations) {
22927 // RSQRT: LHS = (E * -0.5)
22928 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22929 } else {
22930 // SQRT: LHS = (A * E) * -0.5
22931 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22932 }
22933
22934 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22935 }
22936
22937 return Est;
22938}
22939
22940/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22941/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22942/// Op can be zero.
22943SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22944 bool Reciprocal) {
22945 if (LegalDAG)
22946 return SDValue();
22947
22948 // TODO: Handle half and/or extended types?
22949 EVT VT = Op.getValueType();
22950 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22951 return SDValue();
22952
22953 // If estimates are explicitly disabled for this function, we're done.
22954 MachineFunction &MF = DAG.getMachineFunction();
22955 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22956 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22957 return SDValue();
22958
22959 // Estimates may be explicitly enabled for this type with a custom number of
22960 // refinement steps.
22961 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22962
22963 bool UseOneConstNR = false;
22964 if (SDValue Est =
22965 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22966 Reciprocal)) {
22967 AddToWorklist(Est.getNode());
22968
22969 if (Iterations)
22970 Est = UseOneConstNR
22971 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22972 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22973 if (!Reciprocal) {
22974 SDLoc DL(Op);
22975 // Try the target specific test first.
22976 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
22977
22978 // The estimate is now completely wrong if the input was exactly 0.0 or
22979 // possibly a denormal. Force the answer to 0.0 or value provided by
22980 // target for those cases.
22981 Est = DAG.getNode(
22982 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22983 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22984 }
22985 return Est;
22986 }
22987
22988 return SDValue();
22989}
22990
22991SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22992 return buildSqrtEstimateImpl(Op, Flags, true);
22993}
22994
22995SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22996 return buildSqrtEstimateImpl(Op, Flags, false);
22997}
22998
22999/// Return true if there is any possibility that the two addresses overlap.
23000bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
23001
23002 struct MemUseCharacteristics {
23003 bool IsVolatile;
23004 bool IsAtomic;
23005 SDValue BasePtr;
23006 int64_t Offset;
23007 Optional<int64_t> NumBytes;
23008 MachineMemOperand *MMO;
23009 };
23010
23011 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23012 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23013 int64_t Offset = 0;
23014 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23015 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23016 ? C->getSExtValue()
23017 : (LSN->getAddressingMode() == ISD::PRE_DEC)
23018 ? -1 * C->getSExtValue()
23019 : 0;
23020 uint64_t Size =
23021 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23022 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23023 Offset /*base offset*/,
23024 Optional<int64_t>(Size),
23025 LSN->getMemOperand()};
23026 }
23027 if (const auto *LN = cast<LifetimeSDNode>(N))
23028 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23029 (LN->hasOffset()) ? LN->getOffset() : 0,
23030 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23031 : Optional<int64_t>(),
23032 (MachineMemOperand *)nullptr};
23033 // Default.
23034 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23035 (int64_t)0 /*offset*/,
23036 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23037 };
23038
23039 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23040 MUC1 = getCharacteristics(Op1);
23041
23042 // If they are to the same address, then they must be aliases.
23043 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23044 MUC0.Offset == MUC1.Offset)
23045 return true;
23046
23047 // If they are both volatile then they cannot be reordered.
23048 if (MUC0.IsVolatile && MUC1.IsVolatile)
23049 return true;
23050
23051 // Be conservative about atomics for the moment
23052 // TODO: This is way overconservative for unordered atomics (see D66309)
23053 if (MUC0.IsAtomic && MUC1.IsAtomic)
23054 return true;
23055
23056 if (MUC0.MMO && MUC1.MMO) {
23057 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23058 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23059 return false;
23060 }
23061
23062 // Try to prove that there is aliasing, or that there is no aliasing. Either
23063 // way, we can return now. If nothing can be proved, proceed with more tests.
23064 bool IsAlias;
23065 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23066 DAG, IsAlias))
23067 return IsAlias;
23068
23069 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23070 // either are not known.
23071 if (!MUC0.MMO || !MUC1.MMO)
23072 return true;
23073
23074 // If one operation reads from invariant memory, and the other may store, they
23075 // cannot alias. These should really be checking the equivalent of mayWrite,
23076 // but it only matters for memory nodes other than load /store.
23077 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23078 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23079 return false;
23080
23081 // If we know required SrcValue1 and SrcValue2 have relatively large
23082 // alignment compared to the size and offset of the access, we may be able
23083 // to prove they do not alias. This check is conservative for now to catch
23084 // cases created by splitting vector types, it only works when the offsets are
23085 // multiples of the size of the data.
23086 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23087 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23088 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23089 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23090 auto &Size0 = MUC0.NumBytes;
23091 auto &Size1 = MUC1.NumBytes;
23092 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23093 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23094 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23095 SrcValOffset1 % *Size1 == 0) {
23096 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23097 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23098
23099 // There is no overlap between these relatively aligned accesses of
23100 // similar size. Return no alias.
23101 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23102 return false;
23103 }
23104
23105 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23106 ? CombinerGlobalAA
23107 : DAG.getSubtarget().useAA();
23108#ifndef NDEBUG1
23109 if (CombinerAAOnlyFunc.getNumOccurrences() &&
23110 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23111 UseAA = false;
23112#endif
23113
23114 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23115 Size0.hasValue() && Size1.hasValue()) {
23116 // Use alias analysis information.
23117 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23118 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23119 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23120 if (AA->isNoAlias(
23121 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23122 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23123 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23124 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23125 return false;
23126 }
23127
23128 // Otherwise we have to assume they alias.
23129 return true;
23130}
23131
23132/// Walk up chain skipping non-aliasing memory nodes,
23133/// looking for aliasing nodes and adding them to the Aliases vector.
23134void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23135 SmallVectorImpl<SDValue> &Aliases) {
23136 SmallVector<SDValue, 8> Chains; // List of chains to visit.
23137 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23138
23139 // Get alias information for node.
23140 // TODO: relax aliasing for unordered atomics (see D66309)
23141 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23142
23143 // Starting off.
23144 Chains.push_back(OriginalChain);
23145 unsigned Depth = 0;
23146
23147 // Attempt to improve chain by a single step
23148 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23149 switch (C.getOpcode()) {
23150 case ISD::EntryToken:
23151 // No need to mark EntryToken.
23152 C = SDValue();
23153 return true;
23154 case ISD::LOAD:
23155 case ISD::STORE: {
23156 // Get alias information for C.
23157 // TODO: Relax aliasing for unordered atomics (see D66309)
23158 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23159 cast<LSBaseSDNode>(C.getNode())->isSimple();
23160 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
23161 // Look further up the chain.
23162 C = C.getOperand(0);
23163 return true;
23164 }
23165 // Alias, so stop here.
23166 return false;
23167 }
23168
23169 case ISD::CopyFromReg:
23170 // Always forward past past CopyFromReg.
23171 C = C.getOperand(0);
23172 return true;
23173
23174 case ISD::LIFETIME_START:
23175 case ISD::LIFETIME_END: {
23176 // We can forward past any lifetime start/end that can be proven not to
23177 // alias the memory access.
23178 if (!isAlias(N, C.getNode())) {
23179 // Look further up the chain.
23180 C = C.getOperand(0);
23181 return true;
23182 }
23183 return false;
23184 }
23185 default:
23186 return false;
23187 }
23188 };
23189
23190 // Look at each chain and determine if it is an alias. If so, add it to the
23191 // aliases list. If not, then continue up the chain looking for the next
23192 // candidate.
23193 while (!Chains.empty()) {
23194 SDValue Chain = Chains.pop_back_val();
23195
23196 // Don't bother if we've seen Chain before.
23197 if (!Visited.insert(Chain.getNode()).second)
23198 continue;
23199
23200 // For TokenFactor nodes, look at each operand and only continue up the
23201 // chain until we reach the depth limit.
23202 //
23203 // FIXME: The depth check could be made to return the last non-aliasing
23204 // chain we found before we hit a tokenfactor rather than the original
23205 // chain.
23206 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23207 Aliases.clear();
23208 Aliases.push_back(OriginalChain);
23209 return;
23210 }
23211
23212 if (Chain.getOpcode() == ISD::TokenFactor) {
23213 // We have to check each of the operands of the token factor for "small"
23214 // token factors, so we queue them up. Adding the operands to the queue
23215 // (stack) in reverse order maintains the original order and increases the
23216 // likelihood that getNode will find a matching token factor (CSE.)
23217 if (Chain.getNumOperands() > 16) {
23218 Aliases.push_back(Chain);
23219 continue;
23220 }
23221 for (unsigned n = Chain.getNumOperands(); n;)
23222 Chains.push_back(Chain.getOperand(--n));
23223 ++Depth;
23224 continue;
23225 }
23226 // Everything else
23227 if (ImproveChain(Chain)) {
23228 // Updated Chain Found, Consider new chain if one exists.
23229 if (Chain.getNode())
23230 Chains.push_back(Chain);
23231 ++Depth;
23232 continue;
23233 }
23234 // No Improved Chain Possible, treat as Alias.
23235 Aliases.push_back(Chain);
23236 }
23237}
23238
23239/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23240/// (aliasing node.)
23241SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23242 if (OptLevel == CodeGenOpt::None)
23243 return OldChain;
23244
23245 // Ops for replacing token factor.
23246 SmallVector<SDValue, 8> Aliases;
23247
23248 // Accumulate all the aliases to this node.
23249 GatherAllAliases(N, OldChain, Aliases);
23250
23251 // If no operands then chain to entry token.
23252 if (Aliases.size() == 0)
23253 return DAG.getEntryNode();
23254
23255 // If a single operand then chain to it. We don't need to revisit it.
23256 if (Aliases.size() == 1)
23257 return Aliases[0];
23258
23259 // Construct a custom tailored token factor.
23260 return DAG.getTokenFactor(SDLoc(N), Aliases);
23261}
23262
23263namespace {
23264// TODO: Replace with with std::monostate when we move to C++17.
23265struct UnitT { } Unit;
23266bool operator==(const UnitT &, const UnitT &) { return true; }
23267bool operator!=(const UnitT &, const UnitT &) { return false; }
23268} // namespace
23269
23270// This function tries to collect a bunch of potentially interesting
23271// nodes to improve the chains of, all at once. This might seem
23272// redundant, as this function gets called when visiting every store
23273// node, so why not let the work be done on each store as it's visited?
23274//
23275// I believe this is mainly important because mergeConsecutiveStores
23276// is unable to deal with merging stores of different sizes, so unless
23277// we improve the chains of all the potential candidates up-front
23278// before running mergeConsecutiveStores, it might only see some of
23279// the nodes that will eventually be candidates, and then not be able
23280// to go from a partially-merged state to the desired final
23281// fully-merged state.
23282
23283bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23284 SmallVector<StoreSDNode *, 8> ChainedStores;
23285 StoreSDNode *STChain = St;
23286 // Intervals records which offsets from BaseIndex have been covered. In
23287 // the common case, every store writes to the immediately previous address
23288 // space and thus merged with the previous interval at insertion time.
23289
23290 using IMap =
23291 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23292 IMap::Allocator A;
23293 IMap Intervals(A);
23294
23295 // This holds the base pointer, index, and the offset in bytes from the base
23296 // pointer.
23297 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23298
23299 // We must have a base and an offset.
23300 if (!BasePtr.getBase().getNode())
23301 return false;
23302
23303 // Do not handle stores to undef base pointers.
23304 if (BasePtr.getBase().isUndef())
23305 return false;
23306
23307 // Do not handle stores to opaque types
23308 if (St->getMemoryVT().isZeroSized())
23309 return false;
23310
23311 // BaseIndexOffset assumes that offsets are fixed-size, which
23312 // is not valid for scalable vectors where the offsets are
23313 // scaled by `vscale`, so bail out early.
23314 if (St->getMemoryVT().isScalableVector())
23315 return false;
23316
23317 // Add ST's interval.
23318 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23319
23320 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23321 if (Chain->getMemoryVT().isScalableVector())
23322 return false;
23323
23324 // If the chain has more than one use, then we can't reorder the mem ops.
23325 if (!SDValue(Chain, 0)->hasOneUse())
23326 break;
23327 // TODO: Relax for unordered atomics (see D66309)
23328 if (!Chain->isSimple() || Chain->isIndexed())
23329 break;
23330
23331 // Find the base pointer and offset for this memory node.
23332 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23333 // Check that the base pointer is the same as the original one.
23334 int64_t Offset;
23335 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23336 break;
23337 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23338 // Make sure we don't overlap with other intervals by checking the ones to
23339 // the left or right before inserting.
23340 auto I = Intervals.find(Offset);
23341 // If there's a next interval, we should end before it.
23342 if (I != Intervals.end() && I.start() < (Offset + Length))
23343 break;
23344 // If there's a previous interval, we should start after it.
23345 if (I != Intervals.begin() && (--I).stop() <= Offset)
23346 break;
23347 Intervals.insert(Offset, Offset + Length, Unit);
23348
23349 ChainedStores.push_back(Chain);
23350 STChain = Chain;
23351 }
23352
23353 // If we didn't find a chained store, exit.
23354 if (ChainedStores.size() == 0)
23355 return false;
23356
23357 // Improve all chained stores (St and ChainedStores members) starting from
23358 // where the store chain ended and return single TokenFactor.
23359 SDValue NewChain = STChain->getChain();
23360 SmallVector<SDValue, 8> TFOps;
23361 for (unsigned I = ChainedStores.size(); I;) {
23362 StoreSDNode *S = ChainedStores[--I];
23363 SDValue BetterChain = FindBetterChain(S, NewChain);
23364 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23365 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23366 TFOps.push_back(SDValue(S, 0));
23367 ChainedStores[I] = S;
23368 }
23369
23370 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23371 SDValue BetterChain = FindBetterChain(St, NewChain);
23372 SDValue NewST;
23373 if (St->isTruncatingStore())
23374 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23375 St->getBasePtr(), St->getMemoryVT(),
23376 St->getMemOperand());
23377 else
23378 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23379 St->getBasePtr(), St->getMemOperand());
23380
23381 TFOps.push_back(NewST);
23382
23383 // If we improved every element of TFOps, then we've lost the dependence on
23384 // NewChain to successors of St and we need to add it back to TFOps. Do so at
23385 // the beginning to keep relative order consistent with FindBetterChains.
23386 auto hasImprovedChain = [&](SDValue ST) -> bool {
23387 return ST->getOperand(0) != NewChain;
23388 };
23389 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23390 if (AddNewChain)
23391 TFOps.insert(TFOps.begin(), NewChain);
23392
23393 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23394 CombineTo(St, TF);
23395
23396 // Add TF and its operands to the worklist.
23397 AddToWorklist(TF.getNode());
23398 for (const SDValue &Op : TF->ops())
23399 AddToWorklist(Op.getNode());
23400 AddToWorklist(STChain);
23401 return true;
23402}
23403
23404bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23405 if (OptLevel == CodeGenOpt::None)
23406 return false;
23407
23408 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23409
23410 // We must have a base and an offset.
23411 if (!BasePtr.getBase().getNode())
23412 return false;
23413
23414 // Do not handle stores to undef base pointers.
23415 if (BasePtr.getBase().isUndef())
23416 return false;
23417
23418 // Directly improve a chain of disjoint stores starting at St.
23419 if (parallelizeChainedStores(St))
23420 return true;
23421
23422 // Improve St's Chain..
23423 SDValue BetterChain = FindBetterChain(St, St->getChain());
23424 if (St->getChain() != BetterChain) {
23425 replaceStoreChain(St, BetterChain);
23426 return true;
23427 }
23428 return false;
23429}
23430
23431/// This is the entry point for the file.
23432void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23433 CodeGenOpt::Level OptLevel) {
23434 /// This is the main entry point to this class.
23435 DAGCombiner(*this, AA, OptLevel).Run(Level);
23436}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Return true if the node has at least one operand and all operands of the
122/// specified node are ISD::UNDEF.
123bool allOperandsUndef(const SDNode *N);
124
125} // end namespace ISD
126
127//===----------------------------------------------------------------------===//
128/// Unlike LLVM values, Selection DAG nodes may return multiple
129/// values as the result of a computation. Many nodes return multiple values,
130/// from loads (which define a token and a return value) to ADDC (which returns
131/// a result and a carry value), to calls (which may return an arbitrary number
132/// of values).
133///
134/// As such, each use of a SelectionDAG computation must indicate the node that
135/// computes it as well as which return value to use from that node. This pair
136/// of information is represented with the SDValue value type.
137///
138class SDValue {
139 friend struct DenseMapInfo<SDValue>;
140
141 SDNode *Node = nullptr; // The node defining the value we are using.
142 unsigned ResNo = 0; // Which return value of the node we are using.
143
144public:
145 SDValue() = default;
146 SDValue(SDNode *node, unsigned resno);
147
148 /// get the index which selects a specific result in the SDNode
149 unsigned getResNo() const { return ResNo; }
150
151 /// get the SDNode which holds the desired result
152 SDNode *getNode() const { return Node; }
153
154 /// set the SDNode
155 void setNode(SDNode *N) { Node = N; }
156
157 inline SDNode *operator->() const { return Node; }
158
159 bool operator==(const SDValue &O) const {
160 return Node == O.Node && ResNo == O.ResNo;
161 }
162 bool operator!=(const SDValue &O) const {
163 return !operator==(O);
164 }
165 bool operator<(const SDValue &O) const {
166 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
167 }
168 explicit operator bool() const {
169 return Node != nullptr;
170 }
171
172 SDValue getValue(unsigned R) const {
173 return SDValue(Node, R);
174 }
175
176 /// Return true if this node is an operand of N.
177 bool isOperandOf(const SDNode *N) const;
178
179 /// Return the ValueType of the referenced return value.
180 inline EVT getValueType() const;
181
182 /// Return the simple ValueType of the referenced return value.
183 MVT getSimpleValueType() const {
184 return getValueType().getSimpleVT();
185 }
186
187 /// Returns the size of the value in bits.
188 ///
189 /// If the value type is a scalable vector type, the scalable property will
190 /// be set and the runtime size will be a positive integer multiple of the
191 /// base size.
192 TypeSize getValueSizeInBits() const {
193 return getValueType().getSizeInBits();
194 }
195
196 uint64_t getScalarValueSizeInBits() const {
197 return getValueType().getScalarType().getFixedSizeInBits();
198 }
199
200 // Forwarding methods - These forward to the corresponding methods in SDNode.
201 inline unsigned getOpcode() const;
202 inline unsigned getNumOperands() const;
203 inline const SDValue &getOperand(unsigned i) const;
204 inline uint64_t getConstantOperandVal(unsigned i) const;
205 inline const APInt &getConstantOperandAPInt(unsigned i) const;
206 inline bool isTargetMemoryOpcode() const;
207 inline bool isTargetOpcode() const;
208 inline bool isMachineOpcode() const;
209 inline bool isUndef() const;
210 inline unsigned getMachineOpcode() const;
211 inline const DebugLoc &getDebugLoc() const;
212 inline void dump() const;
213 inline void dump(const SelectionDAG *G) const;
214 inline void dumpr() const;
215 inline void dumpr(const SelectionDAG *G) const;
216
217 /// Return true if this operand (which must be a chain) reaches the
218 /// specified operand without crossing any side-effecting instructions.
219 /// In practice, this looks through token factors and non-volatile loads.
220 /// In order to remain efficient, this only
221 /// looks a couple of nodes in, it does not do an exhaustive search.
222 bool reachesChainWithoutSideEffects(SDValue Dest,
223 unsigned Depth = 2) const;
224
225 /// Return true if there are no nodes using value ResNo of Node.
226 inline bool use_empty() const;
227
228 /// Return true if there is exactly one node using value ResNo of Node.
229 inline bool hasOneUse() const;
230};
231
232template<> struct DenseMapInfo<SDValue> {
233 static inline SDValue getEmptyKey() {
234 SDValue V;
235 V.ResNo = -1U;
236 return V;
237 }
238
239 static inline SDValue getTombstoneKey() {
240 SDValue V;
241 V.ResNo = -2U;
242 return V;
243 }
244
245 static unsigned getHashValue(const SDValue &Val) {
246 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
247 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
248 }
249
250 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
251 return LHS == RHS;
252 }
253};
254
255/// Allow casting operators to work directly on
256/// SDValues as if they were SDNode*'s.
257template<> struct simplify_type<SDValue> {
258 using SimpleType = SDNode *;
259
260 static SimpleType getSimplifiedValue(SDValue &Val) {
261 return Val.getNode();
262 }
263};
264template<> struct simplify_type<const SDValue> {
265 using SimpleType = /*const*/ SDNode *;
266
267 static SimpleType getSimplifiedValue(const SDValue &Val) {
268 return Val.getNode();
269 }
270};
271
272/// Represents a use of a SDNode. This class holds an SDValue,
273/// which records the SDNode being used and the result number, a
274/// pointer to the SDNode using the value, and Next and Prev pointers,
275/// which link together all the uses of an SDNode.
276///
277class SDUse {
278 /// Val - The value being used.
279 SDValue Val;
280 /// User - The user of this value.
281 SDNode *User = nullptr;
282 /// Prev, Next - Pointers to the uses list of the SDNode referred by
283 /// this operand.
284 SDUse **Prev = nullptr;
285 SDUse *Next = nullptr;
286
287public:
288 SDUse() = default;
289 SDUse(const SDUse &U) = delete;
290 SDUse &operator=(const SDUse &) = delete;
291
292 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
293 operator const SDValue&() const { return Val; }
294
295 /// If implicit conversion to SDValue doesn't work, the get() method returns
296 /// the SDValue.
297 const SDValue &get() const { return Val; }
298
299 /// This returns the SDNode that contains this Use.
300 SDNode *getUser() { return User; }
301
302 /// Get the next SDUse in the use list.
303 SDUse *getNext() const { return Next; }
304
305 /// Convenience function for get().getNode().
306 SDNode *getNode() const { return Val.getNode(); }
307 /// Convenience function for get().getResNo().
308 unsigned getResNo() const { return Val.getResNo(); }
309 /// Convenience function for get().getValueType().
310 EVT getValueType() const { return Val.getValueType(); }
311
312 /// Convenience function for get().operator==
313 bool operator==(const SDValue &V) const {
314 return Val == V;
315 }
316
317 /// Convenience function for get().operator!=
318 bool operator!=(const SDValue &V) const {
319 return Val != V;
320 }
321
322 /// Convenience function for get().operator<
323 bool operator<(const SDValue &V) const {
324 return Val < V;
325 }
326
327private:
328 friend class SelectionDAG;
329 friend class SDNode;
330 // TODO: unfriend HandleSDNode once we fix its operand handling.
331 friend class HandleSDNode;
332
333 void setUser(SDNode *p) { User = p; }
334
335 /// Remove this use from its existing use list, assign it the
336 /// given value, and add it to the new value's node's use list.
337 inline void set(const SDValue &V);
338 /// Like set, but only supports initializing a newly-allocated
339 /// SDUse with a non-null value.
340 inline void setInitial(const SDValue &V);
341 /// Like set, but only sets the Node portion of the value,
342 /// leaving the ResNo portion unmodified.
343 inline void setNode(SDNode *N);
344
345 void addToList(SDUse **List) {
346 Next = *List;
347 if (Next) Next->Prev = &Next;
348 Prev = List;
349 *List = this;
350 }
351
352 void removeFromList() {
353 *Prev = Next;
354 if (Next) Next->Prev = Prev;
355 }
356};
357
358/// simplify_type specializations - Allow casting operators to work directly on
359/// SDValues as if they were SDNode*'s.
360template<> struct simplify_type<SDUse> {
361 using SimpleType = SDNode *;
362
363 static SimpleType getSimplifiedValue(SDUse &Val) {
364 return Val.getNode();
365 }
366};
367
368/// These are IR-level optimization flags that may be propagated to SDNodes.
369/// TODO: This data structure should be shared by the IR optimizer and the
370/// the backend.
371struct SDNodeFlags {
372private:
373 bool NoUnsignedWrap : 1;
374 bool NoSignedWrap : 1;
375 bool Exact : 1;
376 bool NoNaNs : 1;
377 bool NoInfs : 1;
378 bool NoSignedZeros : 1;
379 bool AllowReciprocal : 1;
380 bool AllowContract : 1;
381 bool ApproximateFuncs : 1;
382 bool AllowReassociation : 1;
383
384 // We assume instructions do not raise floating-point exceptions by default,
385 // and only those marked explicitly may do so. We could choose to represent
386 // this via a positive "FPExcept" flags like on the MI level, but having a
387 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
388 // intersection logic more straightforward.
389 bool NoFPExcept : 1;
390
391public:
392 /// Default constructor turns off all optimization flags.
393 SDNodeFlags()
394 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
395 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
396 AllowContract(false), ApproximateFuncs(false),
397 AllowReassociation(false), NoFPExcept(false) {}
398
399 /// Propagate the fast-math-flags from an IR FPMathOperator.
400 void copyFMF(const FPMathOperator &FPMO) {
401 setNoNaNs(FPMO.hasNoNaNs());
402 setNoInfs(FPMO.hasNoInfs());
403 setNoSignedZeros(FPMO.hasNoSignedZeros());
404 setAllowReciprocal(FPMO.hasAllowReciprocal());
405 setAllowContract(FPMO.hasAllowContract());
406 setApproximateFuncs(FPMO.hasApproxFunc());
407 setAllowReassociation(FPMO.hasAllowReassoc());
408 }
409
410 // These are mutators for each flag.
411 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
412 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
413 void setExact(bool b) { Exact = b; }
414 void setNoNaNs(bool b) { NoNaNs = b; }
415 void setNoInfs(bool b) { NoInfs = b; }
416 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
417 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
418 void setAllowContract(bool b) { AllowContract = b; }
419 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
420 void setAllowReassociation(bool b) { AllowReassociation = b; }
421 void setNoFPExcept(bool b) { NoFPExcept = b; }
422
423 // These are accessors for each flag.
424 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
425 bool hasNoSignedWrap() const { return NoSignedWrap; }
426 bool hasExact() const { return Exact; }
427 bool hasNoNaNs() const { return NoNaNs; }
428 bool hasNoInfs() const { return NoInfs; }
429 bool hasNoSignedZeros() const { return NoSignedZeros; }
430 bool hasAllowReciprocal() const { return AllowReciprocal; }
431 bool hasAllowContract() const { return AllowContract; }
432 bool hasApproximateFuncs() const { return ApproximateFuncs; }
433 bool hasAllowReassociation() const { return AllowReassociation; }
434 bool hasNoFPExcept() const { return NoFPExcept; }
435
436 /// Clear any flags in this flag set that aren't also set in Flags. All
437 /// flags will be cleared if Flags are undefined.
438 void intersectWith(const SDNodeFlags Flags) {
439 NoUnsignedWrap &= Flags.NoUnsignedWrap;
440 NoSignedWrap &= Flags.NoSignedWrap;
441 Exact &= Flags.Exact;
442 NoNaNs &= Flags.NoNaNs;
443 NoInfs &= Flags.NoInfs;
444 NoSignedZeros &= Flags.NoSignedZeros;
445 AllowReciprocal &= Flags.AllowReciprocal;
446 AllowContract &= Flags.AllowContract;
447 ApproximateFuncs &= Flags.ApproximateFuncs;
448 AllowReassociation &= Flags.AllowReassociation;
449 NoFPExcept &= Flags.NoFPExcept;
450 }
451};
452
453/// Represents one node in the SelectionDAG.
454///
455class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
456private:
457 /// The operation that this node performs.
458 int16_t NodeType;
459
460protected:
461 // We define a set of mini-helper classes to help us interpret the bits in our
462 // SubclassData. These are designed to fit within a uint16_t so they pack
463 // with NodeType.
464
465#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
466// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
467// and give the `pack` pragma push semantics.
468#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
469#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
470#else
471#define BEGIN_TWO_BYTE_PACK()
472#define END_TWO_BYTE_PACK()
473#endif
474
475BEGIN_TWO_BYTE_PACK()
476 class SDNodeBitfields {
477 friend class SDNode;
478 friend class MemIntrinsicSDNode;
479 friend class MemSDNode;
480 friend class SelectionDAG;
481
482 uint16_t HasDebugValue : 1;
483 uint16_t IsMemIntrinsic : 1;
484 uint16_t IsDivergent : 1;
485 };
486 enum { NumSDNodeBits = 3 };
487
488 class ConstantSDNodeBitfields {
489 friend class ConstantSDNode;
490
491 uint16_t : NumSDNodeBits;
492
493 uint16_t IsOpaque : 1;
494 };
495
496 class MemSDNodeBitfields {
497 friend class MemSDNode;
498 friend class MemIntrinsicSDNode;
499 friend class AtomicSDNode;
500
501 uint16_t : NumSDNodeBits;
502
503 uint16_t IsVolatile : 1;
504 uint16_t IsNonTemporal : 1;
505 uint16_t IsDereferenceable : 1;
506 uint16_t IsInvariant : 1;
507 };
508 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
509
510 class LSBaseSDNodeBitfields {
511 friend class LSBaseSDNode;
512 friend class MaskedLoadStoreSDNode;
513 friend class MaskedGatherScatterSDNode;
514
515 uint16_t : NumMemSDNodeBits;
516
517 // This storage is shared between disparate class hierarchies to hold an
518 // enumeration specific to the class hierarchy in use.
519 // LSBaseSDNode => enum ISD::MemIndexedMode
520 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
521 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
522 uint16_t AddressingMode : 3;
523 };
524 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
525
526 class LoadSDNodeBitfields {
527 friend class LoadSDNode;
528 friend class MaskedLoadSDNode;
529 friend class MaskedGatherSDNode;
530
531 uint16_t : NumLSBaseSDNodeBits;
532
533 uint16_t ExtTy : 2; // enum ISD::LoadExtType
534 uint16_t IsExpanding : 1;
535 };
536
537 class StoreSDNodeBitfields {
538 friend class StoreSDNode;
539 friend class MaskedStoreSDNode;
540 friend class MaskedScatterSDNode;
541
542 uint16_t : NumLSBaseSDNodeBits;
543
544 uint16_t IsTruncating : 1;
545 uint16_t IsCompressing : 1;
546 };
547
548 union {
549 char RawSDNodeBits[sizeof(uint16_t)];
550 SDNodeBitfields SDNodeBits;
551 ConstantSDNodeBitfields ConstantSDNodeBits;
552 MemSDNodeBitfields MemSDNodeBits;
553 LSBaseSDNodeBitfields LSBaseSDNodeBits;
554 LoadSDNodeBitfields LoadSDNodeBits;
555 StoreSDNodeBitfields StoreSDNodeBits;
556 };
557END_TWO_BYTE_PACK()
558#undef BEGIN_TWO_BYTE_PACK
559#undef END_TWO_BYTE_PACK
560
561 // RawSDNodeBits must cover the entirety of the union. This means that all of
562 // the union's members must have size <= RawSDNodeBits. We write the RHS as
563 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
564 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
565 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
566 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
567 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
568 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
569 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
570
571private:
572 friend class SelectionDAG;
573 // TODO: unfriend HandleSDNode once we fix its operand handling.
574 friend class HandleSDNode;
575
576 /// Unique id per SDNode in the DAG.
577 int NodeId = -1;
578
579 /// The values that are used by this operation.
580 SDUse *OperandList = nullptr;
581
582 /// The types of the values this node defines. SDNode's may
583 /// define multiple values simultaneously.
584 const EVT *ValueList;
585
586 /// List of uses for this SDNode.
587 SDUse *UseList = nullptr;
588
589 /// The number of entries in the Operand/Value list.
590 unsigned short NumOperands = 0;
591 unsigned short NumValues;
592
593 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
594 // original LLVM instructions.
595 // This is used for turning off scheduling, because we'll forgo
596 // the normal scheduling algorithms and output the instructions according to
597 // this ordering.
598 unsigned IROrder;
599
600 /// Source line information.
601 DebugLoc debugLoc;
602
603 /// Return a pointer to the specified value type.
604 static const EVT *getValueTypeList(EVT VT);
605
606 SDNodeFlags Flags;
607
608public:
609 /// Unique and persistent id per SDNode in the DAG.
610 /// Used for debug printing.
611 uint16_t PersistentId;
612
613 //===--------------------------------------------------------------------===//
614 // Accessors
615 //
616
617 /// Return the SelectionDAG opcode value for this node. For
618 /// pre-isel nodes (those for which isMachineOpcode returns false), these
619 /// are the opcode values in the ISD and <target>ISD namespaces. For
620 /// post-isel opcodes, see getMachineOpcode.
621 unsigned getOpcode() const { return (unsigned short)NodeType; }
622
623 /// Test if this node has a target-specific opcode (in the
624 /// \<target\>ISD namespace).
625 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
626
627 /// Test if this node has a target-specific opcode that may raise
628 /// FP exceptions (in the \<target\>ISD namespace and greater than
629 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
630 /// opcode are currently automatically considered to possibly raise
631 /// FP exceptions as well.
632 bool isTargetStrictFPOpcode() const {
633 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
634 }
635
636 /// Test if this node has a target-specific
637 /// memory-referencing opcode (in the \<target\>ISD namespace and
638 /// greater than FIRST_TARGET_MEMORY_OPCODE).
639 bool isTargetMemoryOpcode() const {
640 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
641 }
642
643 /// Return true if the type of the node type undefined.
644 bool isUndef() const { return NodeType == ISD::UNDEF; }
645
646 /// Test if this node is a memory intrinsic (with valid pointer information).
647 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
648 /// non-memory intrinsics (with chains) that are not really instances of
649 /// MemSDNode. For such nodes, we need some extra state to determine the
650 /// proper classof relationship.
651 bool isMemIntrinsic() const {
652 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
653 NodeType == ISD::INTRINSIC_VOID) &&
654 SDNodeBits.IsMemIntrinsic;
655 }
656
657 /// Test if this node is a strict floating point pseudo-op.
658 bool isStrictFPOpcode() {
659 switch (NodeType) {
660 default:
661 return false;
662 case ISD::STRICT_FP16_TO_FP:
663 case ISD::STRICT_FP_TO_FP16:
664#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
665 case ISD::STRICT_##DAGN:
666#include "llvm/IR/ConstrainedOps.def"
667 return true;
668 }
669 }
670
671 /// Test if this node has a post-isel opcode, directly
672 /// corresponding to a MachineInstr opcode.
673 bool isMachineOpcode() const { return NodeType < 0; }
674
675 /// This may only be called if isMachineOpcode returns
676 /// true. It returns the MachineInstr opcode value that the node's opcode
677 /// corresponds to.
678 unsigned getMachineOpcode() const {
679 assert(isMachineOpcode() && "Not a MachineInstr opcode!")((void)0);
680 return ~NodeType;
681 }
682
683 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
684 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
685
686 bool isDivergent() const { return SDNodeBits.IsDivergent; }
687
688 /// Return true if there are no uses of this node.
689 bool use_empty() const { return UseList == nullptr; }
690
691 /// Return true if there is exactly one use of this node.
692 bool hasOneUse() const { return hasSingleElement(uses()); }
693
694 /// Return the number of uses of this node. This method takes
695 /// time proportional to the number of uses.
696 size_t use_size() const { return std::distance(use_begin(), use_end()); }
697
698 /// Return the unique node id.
699 int getNodeId() const { return NodeId; }
700
701 /// Set unique node id.
702 void setNodeId(int Id) { NodeId = Id; }
703
704 /// Return the node ordering.
705 unsigned getIROrder() const { return IROrder; }
706
707 /// Set the node ordering.
708 void setIROrder(unsigned Order) { IROrder = Order; }
709
710 /// Return the source location info.
711 const DebugLoc &getDebugLoc() const { return debugLoc; }
712
713 /// Set source location info. Try to avoid this, putting
714 /// it in the constructor is preferable.
715 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
716
717 /// This class provides iterator support for SDUse
718 /// operands that use a specific SDNode.
719 class use_iterator {
720 friend class SDNode;
721
722 SDUse *Op = nullptr;
723
724 explicit use_iterator(SDUse *op) : Op(op) {}
725
726 public:
727 using iterator_category = std::forward_iterator_tag;
728 using value_type = SDUse;
729 using difference_type = std::ptrdiff_t;
730 using pointer = value_type *;
731 using reference = value_type &;
732
733 use_iterator() = default;
734 use_iterator(const use_iterator &I) : Op(I.Op) {}
735
736 bool operator==(const use_iterator &x) const {
737 return Op == x.Op;
738 }
739 bool operator!=(const use_iterator &x) const {
740 return !operator==(x);
741 }
742
743 /// Return true if this iterator is at the end of uses list.
744 bool atEnd() const { return Op == nullptr; }
745
746 // Iterator traversal: forward iteration only.
747 use_iterator &operator++() { // Preincrement
748 assert(Op && "Cannot increment end iterator!")((void)0);
749 Op = Op->getNext();
750 return *this;
751 }
752
753 use_iterator operator++(int) { // Postincrement
754 use_iterator tmp = *this; ++*this; return tmp;
755 }
756
757 /// Retrieve a pointer to the current user node.
758 SDNode *operator*() const {
759 assert(Op && "Cannot dereference end iterator!")((void)0);
760 return Op->getUser();
761 }
762
763 SDNode *operator->() const { return operator*(); }
764
765 SDUse &getUse() const { return *Op; }
766
767 /// Retrieve the operand # of this use in its user.
768 unsigned getOperandNo() const {
769 assert(Op && "Cannot dereference end iterator!")((void)0);
770 return (unsigned)(Op - Op->getUser()->OperandList);
771 }
772 };
773
774 /// Provide iteration support to walk over all uses of an SDNode.
775 use_iterator use_begin() const {
776 return use_iterator(UseList);
777 }
778
779 static use_iterator use_end() { return use_iterator(nullptr); }
780
781 inline iterator_range<use_iterator> uses() {
782 return make_range(use_begin(), use_end());
783 }
784 inline iterator_range<use_iterator> uses() const {
785 return make_range(use_begin(), use_end());
786 }
787
788 /// Return true if there are exactly NUSES uses of the indicated value.
789 /// This method ignores uses of other values defined by this operation.
790 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
791
792 /// Return true if there are any use of the indicated value.
793 /// This method ignores uses of other values defined by this operation.
794 bool hasAnyUseOfValue(unsigned Value) const;
795
796 /// Return true if this node is the only use of N.
797 bool isOnlyUserOf(const SDNode *N) const;
798
799 /// Return true if this node is an operand of N.
800 bool isOperandOf(const SDNode *N) const;
801
802 /// Return true if this node is a predecessor of N.
803 /// NOTE: Implemented on top of hasPredecessor and every bit as
804 /// expensive. Use carefully.
805 bool isPredecessorOf(const SDNode *N) const {
806 return N->hasPredecessor(this);
807 }
808
809 /// Return true if N is a predecessor of this node.
810 /// N is either an operand of this node, or can be reached by recursively
811 /// traversing up the operands.
812 /// NOTE: This is an expensive method. Use it carefully.
813 bool hasPredecessor(const SDNode *N) const;
814
815 /// Returns true if N is a predecessor of any node in Worklist. This
816 /// helper keeps Visited and Worklist sets externally to allow unions
817 /// searches to be performed in parallel, caching of results across
818 /// queries and incremental addition to Worklist. Stops early if N is
819 /// found but will resume. Remember to clear Visited and Worklists
820 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
821 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
822 /// topologically ordered (Operands have strictly smaller node id) and search
823 /// can be pruned leveraging this.
824 static bool hasPredecessorHelper(const SDNode *N,
825 SmallPtrSetImpl<const SDNode *> &Visited,
826 SmallVectorImpl<const SDNode *> &Worklist,
827 unsigned int MaxSteps = 0,
828 bool TopologicalPrune = false) {
829 SmallVector<const SDNode *, 8> DeferredNodes;
830 if (Visited.count(N))
831 return true;
832
833 // Node Id's are assigned in three places: As a topological
834 // ordering (> 0), during legalization (results in values set to
835 // 0), new nodes (set to -1). If N has a topolgical id then we
836 // know that all nodes with ids smaller than it cannot be
837 // successors and we need not check them. Filter out all node
838 // that can't be matches. We add them to the worklist before exit
839 // in case of multiple calls. Note that during selection the topological id
840 // may be violated if a node's predecessor is selected before it. We mark
841 // this at selection negating the id of unselected successors and
842 // restricting topological pruning to positive ids.
843
844 int NId = N->getNodeId();
845 // If we Invalidated the Id, reconstruct original NId.
846 if (NId < -1)
847 NId = -(NId + 1);
848
849 bool Found = false;
850 while (!Worklist.empty()) {
851 const SDNode *M = Worklist.pop_back_val();
852 int MId = M->getNodeId();
853 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
854 (MId > 0) && (MId < NId)) {
855 DeferredNodes.push_back(M);
856 continue;
857 }
858 for (const SDValue &OpV : M->op_values()) {
859 SDNode *Op = OpV.getNode();
860 if (Visited.insert(Op).second)
861 Worklist.push_back(Op);
862 if (Op == N)
863 Found = true;
864 }
865 if (Found)
866 break;
867 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
868 break;
869 }
870 // Push deferred nodes back on worklist.
871 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
872 // If we bailed early, conservatively return found.
873 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
874 return true;
875 return Found;
876 }
877
878 /// Return true if all the users of N are contained in Nodes.
879 /// NOTE: Requires at least one match, but doesn't require them all.
880 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
881
882 /// Return the number of values used by this operation.
883 unsigned getNumOperands() const { return NumOperands; }
884
885 /// Return the maximum number of operands that a SDNode can hold.
886 static constexpr size_t getMaxNumOperands() {
887 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
888 }
889
890 /// Helper method returns the integer value of a ConstantSDNode operand.
891 inline uint64_t getConstantOperandVal(unsigned Num) const;
892
893 /// Helper method returns the APInt of a ConstantSDNode operand.
894 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
895
896 const SDValue &getOperand(unsigned Num) const {
897 assert(Num < NumOperands && "Invalid child # of SDNode!")((void)0);
898 return OperandList[Num];
899 }
900
901 using op_iterator = SDUse *;
902
903 op_iterator op_begin() const { return OperandList; }
904 op_iterator op_end() const { return OperandList+NumOperands; }
905 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
906
907 /// Iterator for directly iterating over the operand SDValue's.
908 struct value_op_iterator
909 : iterator_adaptor_base<value_op_iterator, op_iterator,
910 std::random_access_iterator_tag, SDValue,
911 ptrdiff_t, value_op_iterator *,
912 value_op_iterator *> {
913 explicit value_op_iterator(SDUse *U = nullptr)
914 : iterator_adaptor_base(U) {}
915
916 const SDValue &operator*() const { return I->get(); }
917 };
918
919 iterator_range<value_op_iterator> op_values() const {
920 return make_range(value_op_iterator(op_begin()),
921 value_op_iterator(op_end()));
922 }
923
924 SDVTList getVTList() const {
925 SDVTList X = { ValueList, NumValues };
926 return X;
927 }
928
929 /// If this node has a glue operand, return the node
930 /// to which the glue operand points. Otherwise return NULL.
931 SDNode *getGluedNode() const {
932 if (getNumOperands() != 0 &&
933 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
934 return getOperand(getNumOperands()-1).getNode();
935 return nullptr;
936 }
937
938 /// If this node has a glue value with a user, return
939 /// the user (there is at most one). Otherwise return NULL.
940 SDNode *getGluedUser() const {
941 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
942 if (UI.getUse().get().getValueType() == MVT::Glue)
943 return *UI;
944 return nullptr;
945 }
946
947 SDNodeFlags getFlags() const { return Flags; }
948 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
949
950 /// Clear any flags in this node that aren't also set in Flags.
951 /// If Flags is not in a defined state then this has no effect.
952 void intersectFlagsWith(const SDNodeFlags Flags);
953
954 /// Return the number of values defined/returned by this operator.
955 unsigned getNumValues() const { return NumValues; }
956
957 /// Return the type of a specified result.
958 EVT getValueType(unsigned ResNo) const {
959 assert(ResNo < NumValues && "Illegal result number!")((void)0);
960 return ValueList[ResNo];
961 }
962
963 /// Return the type of a specified result as a simple type.
964 MVT getSimpleValueType(unsigned ResNo) const {
965 return getValueType(ResNo).getSimpleVT();
966 }
967
968 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
969 ///
970 /// If the value type is a scalable vector type, the scalable property will
971 /// be set and the runtime size will be a positive integer multiple of the
972 /// base size.
973 TypeSize getValueSizeInBits(unsigned ResNo) const {
974 return getValueType(ResNo).getSizeInBits();
975 }
976
977 using value_iterator = const EVT *;
978
979 value_iterator value_begin() const { return ValueList; }
980 value_iterator value_end() const { return ValueList+NumValues; }
981 iterator_range<value_iterator> values() const {
982 return llvm::make_range(value_begin(), value_end());
983 }
984
985 /// Return the opcode of this operation for printing.
986 std::string getOperationName(const SelectionDAG *G = nullptr) const;
987 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
988 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
989 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
990 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
991 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
992
993 /// Print a SelectionDAG node and all children down to
994 /// the leaves. The given SelectionDAG allows target-specific nodes
995 /// to be printed in human-readable form. Unlike printr, this will
996 /// print the whole DAG, including children that appear multiple
997 /// times.
998 ///
999 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1000
1001 /// Print a SelectionDAG node and children up to
1002 /// depth "depth." The given SelectionDAG allows target-specific
1003 /// nodes to be printed in human-readable form. Unlike printr, this
1004 /// will print children that appear multiple times wherever they are
1005 /// used.
1006 ///
1007 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1008 unsigned depth = 100) const;
1009
1010 /// Dump this node, for debugging.
1011 void dump() const;
1012
1013 /// Dump (recursively) this node and its use-def subgraph.
1014 void dumpr() const;
1015
1016 /// Dump this node, for debugging.
1017 /// The given SelectionDAG allows target-specific nodes to be printed
1018 /// in human-readable form.
1019 void dump(const SelectionDAG *G) const;
1020
1021 /// Dump (recursively) this node and its use-def subgraph.
1022 /// The given SelectionDAG allows target-specific nodes to be printed
1023 /// in human-readable form.
1024 void dumpr(const SelectionDAG *G) const;
1025
1026 /// printrFull to dbgs(). The given SelectionDAG allows
1027 /// target-specific nodes to be printed in human-readable form.
1028 /// Unlike dumpr, this will print the whole DAG, including children
1029 /// that appear multiple times.
1030 void dumprFull(const SelectionDAG *G = nullptr) const;
1031
1032 /// printrWithDepth to dbgs(). The given
1033 /// SelectionDAG allows target-specific nodes to be printed in
1034 /// human-readable form. Unlike dumpr, this will print children
1035 /// that appear multiple times wherever they are used.
1036 ///
1037 void dumprWithDepth(const SelectionDAG *G = nullptr,
1038 unsigned depth = 100) const;
1039
1040 /// Gather unique data for the node.
1041 void Profile(FoldingSetNodeID &ID) const;
1042
1043 /// This method should only be used by the SDUse class.
1044 void addUse(SDUse &U) { U.addToList(&UseList); }
1045
1046protected:
1047 static SDVTList getSDVTList(EVT VT) {
1048 SDVTList Ret = { getValueTypeList(VT), 1 };
1049 return Ret;
1050 }
1051
1052 /// Create an SDNode.
1053 ///
1054 /// SDNodes are created without any operands, and never own the operand
1055 /// storage. To add operands, see SelectionDAG::createOperands.
1056 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1057 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1058 IROrder(Order), debugLoc(std::move(dl)) {
1059 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1060 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((void)0);
1061 assert(NumValues == VTs.NumVTs &&((void)0)
1062 "NumValues wasn't wide enough for its operands!")((void)0);
1063 }
1064
1065 /// Release the operands and set this node to have zero operands.
1066 void DropOperands();
1067};
1068
1069/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1070/// into SDNode creation functions.
1071/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1072/// from the original Instruction, and IROrder is the ordinal position of
1073/// the instruction.
1074/// When an SDNode is created after the DAG is being built, both DebugLoc and
1075/// the IROrder are propagated from the original SDNode.
1076/// So SDLoc class provides two constructors besides the default one, one to
1077/// be used by the DAGBuilder, the other to be used by others.
1078class SDLoc {
1079private:
1080 DebugLoc DL;
1081 int IROrder = 0;
1082
1083public:
1084 SDLoc() = default;
1085 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1086 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1087 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1088 assert(Order >= 0 && "bad IROrder")((void)0);
1089 if (I)
1090 DL = I->getDebugLoc();
1091 }
1092
1093 unsigned getIROrder() const { return IROrder; }
1094 const DebugLoc &getDebugLoc() const { return DL; }
1095};
1096
1097// Define inline functions from the SDValue class.
1098
1099inline SDValue::SDValue(SDNode *node, unsigned resno)
1100 : Node(node), ResNo(resno) {
1101 // Explicitly check for !ResNo to avoid use-after-free, because there are
1102 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1103 // combines.
1104 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&((void)0)
1105 "Invalid result number for the given node!")((void)0);
1106 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((void)0);
1107}
1108
1109inline unsigned SDValue::getOpcode() const {
1110 return Node->getOpcode();
1111}
1112
1113inline EVT SDValue::getValueType() const {
1114 return Node->getValueType(ResNo);
1115}
1116
1117inline unsigned SDValue::getNumOperands() const {
1118 return Node->getNumOperands();
1119}
1120
1121inline const SDValue &SDValue::getOperand(unsigned i) const {
1122 return Node->getOperand(i);
1123}
1124
1125inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1126 return Node->getConstantOperandVal(i);
1127}
1128
1129inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1130 return Node->getConstantOperandAPInt(i);
1131}
1132
1133inline bool SDValue::isTargetOpcode() const {
1134 return Node->isTargetOpcode();
1135}
1136
1137inline bool SDValue::isTargetMemoryOpcode() const {
1138 return Node->isTargetMemoryOpcode();
1139}
1140
1141inline bool SDValue::isMachineOpcode() const {
1142 return Node->isMachineOpcode();
1143}
1144
1145inline unsigned SDValue::getMachineOpcode() const {
1146 return Node->getMachineOpcode();
1147}
1148
1149inline bool SDValue::isUndef() const {
1150 return Node->isUndef();
1151}
1152
1153inline bool SDValue::use_empty() const {
1154 return !Node->hasAnyUseOfValue(ResNo);
1155}
1156
1157inline bool SDValue::hasOneUse() const {
1158 return Node->hasNUsesOfValue(1, ResNo);
1159}
1160
1161inline const DebugLoc &SDValue::getDebugLoc() const {
1162 return Node->getDebugLoc();
1163}
1164
1165inline void SDValue::dump() const {
1166 return Node->dump();
1167}
1168
1169inline void SDValue::dump(const SelectionDAG *G) const {
1170 return Node->dump(G);
1171}
1172
1173inline void SDValue::dumpr() const {
1174 return Node->dumpr();
1175}
1176
1177inline void SDValue::dumpr(const SelectionDAG *G) const {
1178 return Node->dumpr(G);
1179}
1180
1181// Define inline functions from the SDUse class.
1182
1183inline void SDUse::set(const SDValue &V) {
1184 if (Val.getNode()) removeFromList();
1185 Val = V;
1186 if (V.getNode()) V.getNode()->addUse(*this);
1187}
1188
1189inline void SDUse::setInitial(const SDValue &V) {
1190 Val = V;
1191 V.getNode()->addUse(*this);
1192}
1193
1194inline void SDUse::setNode(SDNode *N) {
1195 if (Val.getNode()) removeFromList();
1196 Val.setNode(N);
1197 if (N) N->addUse(*this);
1198}
1199
1200/// This class is used to form a handle around another node that
1201/// is persistent and is updated across invocations of replaceAllUsesWith on its
1202/// operand. This node should be directly created by end-users and not added to
1203/// the AllNodes list.
1204class HandleSDNode : public SDNode {
1205 SDUse Op;
1206
1207public:
1208 explicit HandleSDNode(SDValue X)
1209 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1210 // HandleSDNodes are never inserted into the DAG, so they won't be
1211 // auto-numbered. Use ID 65535 as a sentinel.
1212 PersistentId = 0xffff;
1213
1214 // Manually set up the operand list. This node type is special in that it's
1215 // always stack allocated and SelectionDAG does not manage its operands.
1216 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1217 // be so special.
1218 Op.setUser(this);
1219 Op.setInitial(X);
1220 NumOperands = 1;
1221 OperandList = &Op;
1222 }
1223 ~HandleSDNode();
1224
1225 const SDValue &getValue() const { return Op; }
1226};
1227
1228class AddrSpaceCastSDNode : public SDNode {
1229private:
1230 unsigned SrcAddrSpace;
1231 unsigned DestAddrSpace;
1232
1233public:
1234 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1235 unsigned SrcAS, unsigned DestAS);
1236
1237 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1238 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1239
1240 static bool classof(const SDNode *N) {
1241 return N->getOpcode() == ISD::ADDRSPACECAST;
1242 }
1243};
1244
1245/// This is an abstract virtual class for memory operations.
1246class MemSDNode : public SDNode {
1247private:
1248 // VT of in-memory value.
1249 EVT MemoryVT;
1250
1251protected:
1252 /// Memory reference information.
1253 MachineMemOperand *MMO;
1254
1255public:
1256 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1257 EVT memvt, MachineMemOperand *MMO);
1258
1259 bool readMem() const { return MMO->isLoad(); }
1260 bool writeMem() const { return MMO->isStore(); }
1261
1262 /// Returns alignment and volatility of the memory access
1263 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1264 Align getAlign() const { return MMO->getAlign(); }
1265 // FIXME: Remove once transition to getAlign is over.
1266 unsigned getAlignment() const { return MMO->getAlign().value(); }
1267
1268 /// Return the SubclassData value, without HasDebugValue. This contains an
1269 /// encoding of the volatile flag, as well as bits used by subclasses. This
1270 /// function should only be used to compute a FoldingSetNodeID value.
1271 /// The HasDebugValue bit is masked out because CSE map needs to match
1272 /// nodes with debug info with nodes without debug info. Same is about
1273 /// isDivergent bit.
1274 unsigned getRawSubclassData() const {
1275 uint16_t Data;
1276 union {
1277 char RawSDNodeBits[sizeof(uint16_t)];
1278 SDNodeBitfields SDNodeBits;
1279 };
1280 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1281 SDNodeBits.HasDebugValue = 0;
1282 SDNodeBits.IsDivergent = false;
1283 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1284 return Data;
1285 }
1286
1287 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1288 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1289 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1290 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1291
1292 // Returns the offset from the location of the access.
1293 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1294
1295 /// Returns the AA info that describes the dereference.
1296 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1297
1298 /// Returns the Ranges that describes the dereference.
1299 const MDNode *getRanges() const { return MMO->getRanges(); }
1300
1301 /// Returns the synchronization scope ID for this memory operation.
1302 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1303
1304 /// Return the atomic ordering requirements for this memory operation. For
1305 /// cmpxchg atomic operations, return the atomic ordering requirements when
1306 /// store occurs.
1307 AtomicOrdering getSuccessOrdering() const {
1308 return MMO->getSuccessOrdering();
1309 }
1310
1311 /// Return a single atomic ordering that is at least as strong as both the
1312 /// success and failure orderings for an atomic operation. (For operations
1313 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1314 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1315
1316 /// Return true if the memory operation ordering is Unordered or higher.
1317 bool isAtomic() const { return MMO->isAtomic(); }
1318
1319 /// Returns true if the memory operation doesn't imply any ordering
1320 /// constraints on surrounding memory operations beyond the normal memory
1321 /// aliasing rules.
1322 bool isUnordered() const { return MMO->isUnordered(); }
1323
1324 /// Returns true if the memory operation is neither atomic or volatile.
1325 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1326
1327 /// Return the type of the in-memory value.
1328 EVT getMemoryVT() const { return MemoryVT; }
1329
1330 /// Return a MachineMemOperand object describing the memory
1331 /// reference performed by operation.
1332 MachineMemOperand *getMemOperand() const { return MMO; }
1333
1334 const MachinePointerInfo &getPointerInfo() const {
1335 return MMO->getPointerInfo();
11
Called C++ object pointer is null
1336 }
1337
1338 /// Return the address space for the associated pointer
1339 unsigned getAddressSpace() const {
1340 return getPointerInfo().getAddrSpace();
1341 }
1342
1343 /// Update this MemSDNode's MachineMemOperand information
1344 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1345 /// This must only be used when the new alignment applies to all users of
1346 /// this MachineMemOperand.
1347 void refineAlignment(const MachineMemOperand *NewMMO) {
1348 MMO->refineAlignment(NewMMO);
1349 }
1350
1351 const SDValue &getChain() const { return getOperand(0); }
1352
1353 const SDValue &getBasePtr() const {
1354 switch (getOpcode()) {
1355 case ISD::STORE:
1356 case ISD::MSTORE:
1357 return getOperand(2);
1358 case ISD::MGATHER:
1359 case ISD::MSCATTER:
1360 return getOperand(3);
1361 default:
1362 return getOperand(1);
1363 }
1364 }
1365
1366 // Methods to support isa and dyn_cast
1367 static bool classof(const SDNode *N) {
1368 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1369 // with either an intrinsic or a target opcode.
1370 switch (N->getOpcode()) {
1371 case ISD::LOAD:
1372 case ISD::STORE:
1373 case ISD::PREFETCH:
1374 case ISD::ATOMIC_CMP_SWAP:
1375 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1376 case ISD::ATOMIC_SWAP:
1377 case ISD::ATOMIC_LOAD_ADD:
1378 case ISD::ATOMIC_LOAD_SUB:
1379 case ISD::ATOMIC_LOAD_AND:
1380 case ISD::ATOMIC_LOAD_CLR:
1381 case ISD::ATOMIC_LOAD_OR:
1382 case ISD::ATOMIC_LOAD_XOR:
1383 case ISD::ATOMIC_LOAD_NAND:
1384 case ISD::ATOMIC_LOAD_MIN:
1385 case ISD::ATOMIC_LOAD_MAX:
1386 case ISD::ATOMIC_LOAD_UMIN:
1387 case ISD::ATOMIC_LOAD_UMAX:
1388 case ISD::ATOMIC_LOAD_FADD:
1389 case ISD::ATOMIC_LOAD_FSUB:
1390 case ISD::ATOMIC_LOAD:
1391 case ISD::ATOMIC_STORE:
1392 case ISD::MLOAD:
1393 case ISD::MSTORE:
1394 case ISD::MGATHER:
1395 case ISD::MSCATTER:
1396 return true;
1397 default:
1398 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1399 }
1400 }
1401};
1402
1403/// This is an SDNode representing atomic operations.
1404class AtomicSDNode : public MemSDNode {
1405public:
1406 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1407 EVT MemVT, MachineMemOperand *MMO)
1408 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1409 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((void)0)
1410 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((void)0);
1411 }
1412
1413 const SDValue &getBasePtr() const { return getOperand(1); }
1414 const SDValue &getVal() const { return getOperand(2); }
1415
1416 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1417 /// otherwise.
1418 bool isCompareAndSwap() const {
1419 unsigned Op = getOpcode();
1420 return Op == ISD::ATOMIC_CMP_SWAP ||
1421 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1422 }
1423
1424 /// For cmpxchg atomic operations, return the atomic ordering requirements
1425 /// when store does not occur.
1426 AtomicOrdering getFailureOrdering() const {
1427 assert(isCompareAndSwap() && "Must be cmpxchg operation")((void)0);
1428 return MMO->getFailureOrdering();
1429 }
1430
1431 // Methods to support isa and dyn_cast
1432 static bool classof(const SDNode *N) {
1433 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1434 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1435 N->getOpcode() == ISD::ATOMIC_SWAP ||
1436 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1437 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1438 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1439 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1440 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1441 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1442 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1443 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1444 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1445 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1446 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1447 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1448 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1449 N->getOpcode() == ISD::ATOMIC_LOAD ||
1450 N->getOpcode() == ISD::ATOMIC_STORE;
1451 }
1452};
1453
1454/// This SDNode is used for target intrinsics that touch
1455/// memory and need an associated MachineMemOperand. Its opcode may be
1456/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1457/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1458class MemIntrinsicSDNode : public MemSDNode {
1459public:
1460 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1461 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1462 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1463 SDNodeBits.IsMemIntrinsic = true;
1464 }
1465
1466 // Methods to support isa and dyn_cast
1467 static bool classof(const SDNode *N) {
1468 // We lower some target intrinsics to their target opcode
1469 // early a node with a target opcode can be of this class
1470 return N->isMemIntrinsic() ||
1471 N->getOpcode() == ISD::PREFETCH ||
1472 N->isTargetMemoryOpcode();
1473 }
1474};
1475
1476/// This SDNode is used to implement the code generator
1477/// support for the llvm IR shufflevector instruction. It combines elements
1478/// from two input vectors into a new input vector, with the selection and
1479/// ordering of elements determined by an array of integers, referred to as
1480/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1481/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1482/// An index of -1 is treated as undef, such that the code generator may put
1483/// any value in the corresponding element of the result.
1484class ShuffleVectorSDNode : public SDNode {
1485 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1486 // is freed when the SelectionDAG object is destroyed.
1487 const int *Mask;
1488
1489protected:
1490 friend class SelectionDAG;
1491
1492 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1493 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1494
1495public:
1496 ArrayRef<int> getMask() const {
1497 EVT VT = getValueType(0);
1498 return makeArrayRef(Mask, VT.getVectorNumElements());
1499 }
1500
1501 int getMaskElt(unsigned Idx) const {
1502 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((void)0);
1503 return Mask[Idx];
1504 }
1505
1506 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1507
1508 int getSplatIndex() const {
1509 assert(isSplat() && "Cannot get splat index for non-splat!")((void)0);
1510 EVT VT = getValueType(0);
1511 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1512 if (Mask[i] >= 0)
1513 return Mask[i];
1514
1515 // We can choose any index value here and be correct because all elements
1516 // are undefined. Return 0 for better potential for callers to simplify.
1517 return 0;
1518 }
1519
1520 static bool isSplatMask(const int *Mask, EVT VT);
1521
1522 /// Change values in a shuffle permute mask assuming
1523 /// the two vector operands have swapped position.
1524 static void commuteMask(MutableArrayRef<int> Mask) {
1525 unsigned NumElems = Mask.size();
1526 for (unsigned i = 0; i != NumElems; ++i) {
1527 int idx = Mask[i];
1528 if (idx < 0)
1529 continue;
1530 else if (idx < (int)NumElems)
1531 Mask[i] = idx + NumElems;
1532 else
1533 Mask[i] = idx - NumElems;
1534 }
1535 }
1536
1537 static bool classof(const SDNode *N) {
1538 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1539 }
1540};
1541
1542class ConstantSDNode : public SDNode {
1543 friend class SelectionDAG;
1544
1545 const ConstantInt *Value;
1546
1547 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1548 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1549 getSDVTList(VT)),
1550 Value(val) {
1551 ConstantSDNodeBits.IsOpaque = isOpaque;
1552 }
1553
1554public:
1555 const ConstantInt *getConstantIntValue() const { return Value; }
1556 const APInt &getAPIntValue() const { return Value->getValue(); }
1557 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1558 int64_t getSExtValue() const { return Value->getSExtValue(); }
1559 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX0xffffffffffffffffULL) {
1560 return Value->getLimitedValue(Limit);
1561 }
1562 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1563 Align getAlignValue() const { return Value->getAlignValue(); }
1564
1565 bool isOne() const { return Value->isOne(); }
1566 bool isNullValue() const { return Value->isZero(); }
1567 bool isAllOnesValue() const { return Value->isMinusOne(); }
1568 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1569 bool isMinSignedValue() const { return Value->isMinValue(true); }
1570
1571 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1572
1573 static bool classof(const SDNode *N) {
1574 return N->getOpcode() == ISD::Constant ||
1575 N->getOpcode() == ISD::TargetConstant;
1576 }
1577};
1578
1579uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1580 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1581}
1582
1583const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1584 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1585}
1586
1587class ConstantFPSDNode : public SDNode {
1588 friend class SelectionDAG;
1589
1590 const ConstantFP *Value;
1591
1592 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1593 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1594 DebugLoc(), getSDVTList(VT)),
1595 Value(val) {}
1596
1597public:
1598 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1599 const ConstantFP *getConstantFPValue() const { return Value; }
1600
1601 /// Return true if the value is positive or negative zero.
1602 bool isZero() const { return Value->isZero(); }
1603
1604 /// Return true if the value is a NaN.
1605 bool isNaN() const { return Value->isNaN(); }
1606
1607 /// Return true if the value is an infinity
1608 bool isInfinity() const { return Value->isInfinity(); }
1609
1610 /// Return true if the value is negative.
1611 bool isNegative() const { return Value->isNegative(); }
1612
1613 /// We don't rely on operator== working on double values, as
1614 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1615 /// As such, this method can be used to do an exact bit-for-bit comparison of
1616 /// two floating point values.
1617
1618 /// We leave the version with the double argument here because it's just so
1619 /// convenient to write "2.0" and the like. Without this function we'd
1620 /// have to duplicate its logic everywhere it's called.
1621 bool isExactlyValue(double V) const {
1622 return Value->getValueAPF().isExactlyValue(V);
1623 }
1624 bool isExactlyValue(const APFloat& V) const;
1625
1626 static bool isValueValidForType(EVT VT, const APFloat& Val);
1627
1628 static bool classof(const SDNode *N) {
1629 return N->getOpcode() == ISD::ConstantFP ||
1630 N->getOpcode() == ISD::TargetConstantFP;
1631 }
1632};
1633
1634/// Returns true if \p V is a constant integer zero.
1635bool isNullConstant(SDValue V);
1636
1637/// Returns true if \p V is an FP constant with a value of positive zero.
1638bool isNullFPConstant(SDValue V);
1639
1640/// Returns true if \p V is an integer constant with all bits set.
1641bool isAllOnesConstant(SDValue V);
1642
1643/// Returns true if \p V is a constant integer one.
1644bool isOneConstant(SDValue V);
1645
1646/// Return the non-bitcasted source operand of \p V if it exists.
1647/// If \p V is not a bitcasted value, it is returned as-is.
1648SDValue peekThroughBitcasts(SDValue V);
1649
1650/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1651/// If \p V is not a bitcasted one-use value, it is returned as-is.
1652SDValue peekThroughOneUseBitcasts(SDValue V);
1653
1654/// Return the non-extracted vector source operand of \p V if it exists.
1655/// If \p V is not an extracted subvector, it is returned as-is.
1656SDValue peekThroughExtractSubvectors(SDValue V);
1657
1658/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1659/// constant is canonicalized to be operand 1.
1660bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1661
1662/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1663ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1664 bool AllowTruncation = false);
1665
1666/// Returns the SDNode if it is a demanded constant splat BuildVector or
1667/// constant int.
1668ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1669 bool AllowUndefs = false,
1670 bool AllowTruncation = false);
1671
1672/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1673ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1674
1675/// Returns the SDNode if it is a demanded constant splat BuildVector or
1676/// constant float.
1677ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1678 bool AllowUndefs = false);
1679
1680/// Return true if the value is a constant 0 integer or a splatted vector of
1681/// a constant 0 integer (with no undefs by default).
1682/// Build vector implicit truncation is not an issue for null values.
1683bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1684
1685/// Return true if the value is a constant 1 integer or a splatted vector of a
1686/// constant 1 integer (with no undefs).
1687/// Does not permit build vector implicit truncation.
1688bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1689
1690/// Return true if the value is a constant -1 integer or a splatted vector of a
1691/// constant -1 integer (with no undefs).
1692/// Does not permit build vector implicit truncation.
1693bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1694
1695/// Return true if \p V is either a integer or FP constant.
1696inline bool isIntOrFPConstant(SDValue V) {
1697 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1698}
1699
1700class GlobalAddressSDNode : public SDNode {
1701 friend class SelectionDAG;
1702
1703 const GlobalValue *TheGlobal;
1704 int64_t Offset;
1705 unsigned TargetFlags;
1706
1707 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1708 const GlobalValue *GA, EVT VT, int64_t o,
1709 unsigned TF);
1710
1711public:
1712 const GlobalValue *getGlobal() const { return TheGlobal; }
1713 int64_t getOffset() const { return Offset; }
1714 unsigned getTargetFlags() const { return TargetFlags; }
1715 // Return the address space this GlobalAddress belongs to.
1716 unsigned getAddressSpace() const;
1717
1718 static bool classof(const SDNode *N) {
1719 return N->getOpcode() == ISD::GlobalAddress ||
1720 N->getOpcode() == ISD::TargetGlobalAddress ||
1721 N->getOpcode() == ISD::GlobalTLSAddress ||
1722 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1723 }
1724};
1725
1726class FrameIndexSDNode : public SDNode {
1727 friend class SelectionDAG;
1728
1729 int FI;
1730
1731 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1732 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1733 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1734 }
1735
1736public:
1737 int getIndex() const { return FI; }
1738
1739 static bool classof(const SDNode *N) {
1740 return N->getOpcode() == ISD::FrameIndex ||
1741 N->getOpcode() == ISD::TargetFrameIndex;
1742 }
1743};
1744
1745/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1746/// the offet and size that are started/ended in the underlying FrameIndex.
1747class LifetimeSDNode : public SDNode {
1748 friend class SelectionDAG;
1749 int64_t Size;
1750 int64_t Offset; // -1 if offset is unknown.
1751
1752 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1753 SDVTList VTs, int64_t Size, int64_t Offset)
1754 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1755public:
1756 int64_t getFrameIndex() const {
1757 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1758 }
1759
1760 bool hasOffset() const { return Offset >= 0; }
1761 int64_t getOffset() const {
1762 assert(hasOffset() && "offset is unknown")((void)0);
1763 return Offset;
1764 }
1765 int64_t getSize() const {
1766 assert(hasOffset() && "offset is unknown")((void)0);
1767 return Size;
1768 }
1769
1770 // Methods to support isa and dyn_cast
1771 static bool classof(const SDNode *N) {
1772 return N->getOpcode() == ISD::LIFETIME_START ||
1773 N->getOpcode() == ISD::LIFETIME_END;
1774 }
1775};
1776
1777/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1778/// the index of the basic block being probed. A pseudo probe serves as a place
1779/// holder and will be removed at the end of compilation. It does not have any
1780/// operand because we do not want the instruction selection to deal with any.
1781class PseudoProbeSDNode : public SDNode {
1782 friend class SelectionDAG;
1783 uint64_t Guid;
1784 uint64_t Index;
1785 uint32_t Attributes;
1786
1787 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1788 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1789 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1790 Attributes(Attr) {}
1791
1792public:
1793 uint64_t getGuid() const { return Guid; }
1794 uint64_t getIndex() const { return Index; }
1795 uint32_t getAttributes() const { return Attributes; }
1796
1797 // Methods to support isa and dyn_cast
1798 static bool classof(const SDNode *N) {
1799 return N->getOpcode() == ISD::PSEUDO_PROBE;
1800 }
1801};
1802
1803class JumpTableSDNode : public SDNode {
1804 friend class SelectionDAG;
1805
1806 int JTI;
1807 unsigned TargetFlags;
1808
1809 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1810 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1811 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1812 }
1813
1814public:
1815 int getIndex() const { return JTI; }
1816 unsigned getTargetFlags() const { return TargetFlags; }
1817
1818 static bool classof(const SDNode *N) {
1819 return N->getOpcode() == ISD::JumpTable ||
1820 N->getOpcode() == ISD::TargetJumpTable;
1821 }
1822};
1823
1824class ConstantPoolSDNode : public SDNode {
1825 friend class SelectionDAG;
1826
1827 union {
1828 const Constant *ConstVal;
1829 MachineConstantPoolValue *MachineCPVal;
1830 } Val;
1831 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1832 Align Alignment; // Minimum alignment requirement of CP.
1833 unsigned TargetFlags;
1834
1835 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1836 Align Alignment, unsigned TF)
1837 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1838 DebugLoc(), getSDVTList(VT)),
1839 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1840 assert(Offset >= 0 && "Offset is too large")((void)0);
1841 Val.ConstVal = c;
1842 }
1843
1844 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1845 Align Alignment, unsigned TF)
1846 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1847 DebugLoc(), getSDVTList(VT)),
1848 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1849 assert(Offset >= 0 && "Offset is too large")((void)0);
1850 Val.MachineCPVal = v;
1851 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1852 }
1853
1854public:
1855 bool isMachineConstantPoolEntry() const {
1856 return Offset < 0;
1857 }
1858
1859 const Constant *getConstVal() const {
1860 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0);
1861 return Val.ConstVal;
1862 }
1863
1864 MachineConstantPoolValue *getMachineCPVal() const {
1865 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0);
1866 return Val.MachineCPVal;
1867 }
1868
1869 int getOffset() const {
1870 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1871 }
1872
1873 // Return the alignment of this constant pool object, which is either 0 (for
1874 // default alignment) or the desired value.
1875 Align getAlign() const { return Alignment; }
1876 unsigned getTargetFlags() const { return TargetFlags; }
1877
1878 Type *getType() const;
1879
1880 static bool classof(const SDNode *N) {
1881 return N->getOpcode() == ISD::ConstantPool ||
1882 N->getOpcode() == ISD::TargetConstantPool;
1883 }
1884};
1885
1886/// Completely target-dependent object reference.
1887class TargetIndexSDNode : public SDNode {
1888 friend class SelectionDAG;
1889
1890 unsigned TargetFlags;
1891 int Index;
1892 int64_t Offset;
1893
1894public:
1895 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1896 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1897 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1898
1899 unsigned getTargetFlags() const { return TargetFlags; }
1900 int getIndex() const { return Index; }
1901 int64_t getOffset() const { return Offset; }
1902
1903 static bool classof(const SDNode *N) {
1904 return N->getOpcode() == ISD::TargetIndex;
1905 }
1906};
1907
1908class BasicBlockSDNode : public SDNode {
1909 friend class SelectionDAG;
1910
1911 MachineBasicBlock *MBB;
1912
1913 /// Debug info is meaningful and potentially useful here, but we create
1914 /// blocks out of order when they're jumped to, which makes it a bit
1915 /// harder. Let's see if we need it first.
1916 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1917 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1918 {}
1919
1920public:
1921 MachineBasicBlock *getBasicBlock() const { return MBB; }
1922
1923 static bool classof(const SDNode *N) {
1924 return N->getOpcode() == ISD::BasicBlock;
1925 }
1926};
1927
1928/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1929class BuildVectorSDNode : public SDNode {
1930public:
1931 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1932 explicit BuildVectorSDNode() = delete;
1933
1934 /// Check if this is a constant splat, and if so, find the
1935 /// smallest element size that splats the vector. If MinSplatBits is
1936 /// nonzero, the element size must be at least that large. Note that the
1937 /// splat element may be the entire vector (i.e., a one element vector).
1938 /// Returns the splat element value in SplatValue. Any undefined bits in
1939 /// that value are zero, and the corresponding bits in the SplatUndef mask
1940 /// are set. The SplatBitSize value is set to the splat element size in
1941 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1942 /// undefined. isBigEndian describes the endianness of the target.
1943 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1944 unsigned &SplatBitSize, bool &HasAnyUndefs,
1945 unsigned MinSplatBits = 0,
1946 bool isBigEndian = false) const;
1947
1948 /// Returns the demanded splatted value or a null value if this is not a
1949 /// splat.
1950 ///
1951 /// The DemandedElts mask indicates the elements that must be in the splat.
1952 /// If passed a non-null UndefElements bitvector, it will resize it to match
1953 /// the vector width and set the bits where elements are undef.
1954 SDValue getSplatValue(const APInt &DemandedElts,
1955 BitVector *UndefElements = nullptr) const;
1956
1957 /// Returns the splatted value or a null value if this is not a splat.
1958 ///
1959 /// If passed a non-null UndefElements bitvector, it will resize it to match
1960 /// the vector width and set the bits where elements are undef.
1961 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1962
1963 /// Find the shortest repeating sequence of values in the build vector.
1964 ///
1965 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1966 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1967 ///
1968 /// Currently this must be a power-of-2 build vector.
1969 /// The DemandedElts mask indicates the elements that must be present,
1970 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1971 /// non-null UndefElements bitvector, it will resize it to match the original
1972 /// vector width and set the bits where elements are undef. If result is
1973 /// false, Sequence will be empty.
1974 bool getRepeatedSequence(const APInt &DemandedElts,
1975 SmallVectorImpl<SDValue> &Sequence,
1976 BitVector *UndefElements = nullptr) const;
1977
1978 /// Find the shortest repeating sequence of values in the build vector.
1979 ///
1980 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1981 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1982 ///
1983 /// Currently this must be a power-of-2 build vector.
1984 /// If passed a non-null UndefElements bitvector, it will resize it to match
1985 /// the original vector width and set the bits where elements are undef.
1986 /// If result is false, Sequence will be empty.
1987 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
1988 BitVector *UndefElements = nullptr) const;
1989
1990 /// Returns the demanded splatted constant or null if this is not a constant
1991 /// splat.
1992 ///
1993 /// The DemandedElts mask indicates the elements that must be in the splat.
1994 /// If passed a non-null UndefElements bitvector, it will resize it to match
1995 /// the vector width and set the bits where elements are undef.
1996 ConstantSDNode *
1997 getConstantSplatNode(const APInt &DemandedElts,
1998 BitVector *UndefElements = nullptr) const;
1999
2000 /// Returns the splatted constant or null if this is not a constant
2001 /// splat.
2002 ///
2003 /// If passed a non-null UndefElements bitvector, it will resize it to match
2004 /// the vector width and set the bits where elements are undef.
2005 ConstantSDNode *
2006 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2007
2008 /// Returns the demanded splatted constant FP or null if this is not a
2009 /// constant FP splat.
2010 ///
2011 /// The DemandedElts mask indicates the elements that must be in the splat.
2012 /// If passed a non-null UndefElements bitvector, it will resize it to match
2013 /// the vector width and set the bits where elements are undef.
2014 ConstantFPSDNode *
2015 getConstantFPSplatNode(const APInt &DemandedElts,
2016 BitVector *UndefElements = nullptr) const;
2017
2018 /// Returns the splatted constant FP or null if this is not a constant
2019 /// FP splat.
2020 ///
2021 /// If passed a non-null UndefElements bitvector, it will resize it to match
2022 /// the vector width and set the bits where elements are undef.
2023 ConstantFPSDNode *
2024 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2025
2026 /// If this is a constant FP splat and the splatted constant FP is an
2027 /// exact power or 2, return the log base 2 integer value. Otherwise,
2028 /// return -1.
2029 ///
2030 /// The BitWidth specifies the necessary bit precision.
2031 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2032 uint32_t BitWidth) const;
2033
2034 bool isConstant() const;
2035
2036 static bool classof(const SDNode *N) {
2037 return N->getOpcode() == ISD::BUILD_VECTOR;
2038 }
2039};
2040
2041/// An SDNode that holds an arbitrary LLVM IR Value. This is
2042/// used when the SelectionDAG needs to make a simple reference to something
2043/// in the LLVM IR representation.
2044///
2045class SrcValueSDNode : public SDNode {
2046 friend class SelectionDAG;
2047
2048 const Value *V;
2049
2050 /// Create a SrcValue for a general value.
2051 explicit SrcValueSDNode(const Value *v)
2052 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2053
2054public:
2055 /// Return the contained Value.
2056 const Value *getValue() const { return V; }
2057
2058 static bool classof(const SDNode *N) {
2059 return N->getOpcode() == ISD::SRCVALUE;
2060 }
2061};
2062
2063class MDNodeSDNode : public SDNode {
2064 friend class SelectionDAG;
2065
2066 const MDNode *MD;
2067
2068 explicit MDNodeSDNode(const MDNode *md)
2069 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2070 {}
2071
2072public:
2073 const MDNode *getMD() const { return MD; }
2074
2075 static bool classof(const SDNode *N) {
2076 return N->getOpcode() == ISD::MDNODE_SDNODE;
2077 }
2078};
2079
2080class RegisterSDNode : public SDNode {
2081 friend class SelectionDAG;
2082
2083 Register Reg;
2084
2085 RegisterSDNode(Register reg, EVT VT)
2086 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2087
2088public:
2089 Register getReg() const { return Reg; }
2090
2091 static bool classof(const SDNode *N) {
2092 return N->getOpcode() == ISD::Register;
2093 }
2094};
2095
2096class RegisterMaskSDNode : public SDNode {
2097 friend class SelectionDAG;
2098
2099 // The memory for RegMask is not owned by the node.
2100 const uint32_t *RegMask;
2101
2102 RegisterMaskSDNode(const uint32_t *mask)
2103 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2104 RegMask(mask) {}
2105
2106public:
2107 const uint32_t *getRegMask() const { return RegMask; }
2108
2109 static bool classof(const SDNode *N) {
2110 return N->getOpcode() == ISD::RegisterMask;
2111 }
2112};
2113
2114class BlockAddressSDNode : public SDNode {
2115 friend class SelectionDAG;
2116
2117 const BlockAddress *BA;
2118 int64_t Offset;
2119 unsigned TargetFlags;
2120
2121 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2122 int64_t o, unsigned Flags)
2123 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2124 BA(ba), Offset(o), TargetFlags(Flags) {}
2125
2126public:
2127 const BlockAddress *getBlockAddress() const { return BA; }
2128 int64_t getOffset() const { return Offset; }
2129 unsigned getTargetFlags() const { return TargetFlags; }
2130
2131 static bool classof(const SDNode *N) {
2132 return N->getOpcode() == ISD::BlockAddress ||
2133 N->getOpcode() == ISD::TargetBlockAddress;
2134 }
2135};
2136
2137class LabelSDNode : public SDNode {
2138 friend class SelectionDAG;
2139
2140 MCSymbol *Label;
2141
2142 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2143 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2144 assert(LabelSDNode::classof(this) && "not a label opcode")((void)0);
2145 }
2146
2147public:
2148 MCSymbol *getLabel() const { return Label; }
2149
2150 static bool classof(const SDNode *N) {
2151 return N->getOpcode() == ISD::EH_LABEL ||
2152 N->getOpcode() == ISD::ANNOTATION_LABEL;
2153 }
2154};
2155
2156class ExternalSymbolSDNode : public SDNode {
2157 friend class SelectionDAG;
2158
2159 const char *Symbol;
2160 unsigned TargetFlags;
2161
2162 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2163 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2164 DebugLoc(), getSDVTList(VT)),
2165 Symbol(Sym), TargetFlags(TF) {}
2166
2167public:
2168 const char *getSymbol() const { return Symbol; }
2169 unsigned getTargetFlags() const { return TargetFlags; }
2170
2171 static bool classof(const SDNode *N) {
2172 return N->getOpcode() == ISD::ExternalSymbol ||
2173 N->getOpcode() == ISD::TargetExternalSymbol;
2174 }
2175};
2176
2177class MCSymbolSDNode : public SDNode {
2178 friend class SelectionDAG;
2179
2180 MCSymbol *Symbol;
2181
2182 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2183 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2184
2185public:
2186 MCSymbol *getMCSymbol() const { return Symbol; }
2187
2188 static bool classof(const SDNode *N) {
2189 return N->getOpcode() == ISD::MCSymbol;
2190 }
2191};
2192
2193class CondCodeSDNode : public SDNode {
2194 friend class SelectionDAG;
2195
2196 ISD::CondCode Condition;
2197
2198 explicit CondCodeSDNode(ISD::CondCode Cond)
2199 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2200 Condition(Cond) {}
2201
2202public:
2203 ISD::CondCode get() const { return Condition; }
2204
2205 static bool classof(const SDNode *N) {
2206 return N->getOpcode() == ISD::CONDCODE;
2207 }
2208};
2209
2210/// This class is used to represent EVT's, which are used
2211/// to parameterize some operations.
2212class VTSDNode : public SDNode {
2213 friend class SelectionDAG;
2214
2215 EVT ValueType;
2216
2217 explicit VTSDNode(EVT VT)
2218 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2219 ValueType(VT) {}
2220
2221public:
2222 EVT getVT() const { return ValueType; }
2223
2224 static bool classof(const SDNode *N) {
2225 return N->getOpcode() == ISD::VALUETYPE;
2226 }
2227};
2228
2229/// Base class for LoadSDNode and StoreSDNode
2230class LSBaseSDNode : public MemSDNode {
2231public:
2232 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2233 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2234 MachineMemOperand *MMO)
2235 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2236 LSBaseSDNodeBits.AddressingMode = AM;
2237 assert(getAddressingMode() == AM && "Value truncated")((void)0);
2238 }
2239
2240 const SDValue &getOffset() const {
2241 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2242 }
2243
2244 /// Return the addressing mode for this load or store:
2245 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2246 ISD::MemIndexedMode getAddressingMode() const {
2247 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2248 }
2249
2250 /// Return true if this is a pre/post inc/dec load/store.
2251 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2252
2253 /// Return true if this is NOT a pre/post inc/dec load/store.
2254 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2255
2256 static bool classof(const SDNode *N) {
2257 return N->getOpcode() == ISD::LOAD ||
2258 N->getOpcode() == ISD::STORE;
2259 }
2260};
2261
2262/// This class is used to represent ISD::LOAD nodes.
2263class LoadSDNode : public LSBaseSDNode {
2264 friend class SelectionDAG;
2265
2266 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2267 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2268 MachineMemOperand *MMO)
2269 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2270 LoadSDNodeBits.ExtTy = ETy;
2271 assert(readMem() && "Load MachineMemOperand is not a load!")((void)0);
2272 assert(!writeMem() && "Load MachineMemOperand is a store!")((void)0);
2273 }
2274
2275public:
2276 /// Return whether this is a plain node,
2277 /// or one of the varieties of value-extending loads.
2278 ISD::LoadExtType getExtensionType() const {
2279 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2280 }
2281
2282 const SDValue &getBasePtr() const { return getOperand(1); }
2283 const SDValue &getOffset() const { return getOperand(2); }
2284
2285 static bool classof(const SDNode *N) {
2286 return N->getOpcode() == ISD::LOAD;
2287 }
2288};
2289
2290/// This class is used to represent ISD::STORE nodes.
2291class StoreSDNode : public LSBaseSDNode {
2292 friend class SelectionDAG;
2293
2294 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2295 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2296 MachineMemOperand *MMO)
2297 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2298 StoreSDNodeBits.IsTruncating = isTrunc;
2299 assert(!readMem() && "Store MachineMemOperand is a load!")((void)0);
2300 assert(writeMem() && "Store MachineMemOperand is not a store!")((void)0);
2301 }
2302
2303public:
2304 /// Return true if the op does a truncation before store.
2305 /// For integers this is the same as doing a TRUNCATE and storing the result.
2306 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2307 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2308 void setTruncatingStore(bool Truncating) {
2309 StoreSDNodeBits.IsTruncating = Truncating;
2310 }
2311
2312 const SDValue &getValue() const { return getOperand(1); }
2313 const SDValue &getBasePtr() const { return getOperand(2); }
2314 const SDValue &getOffset() const { return getOperand(3); }
2315
2316 static bool classof(const SDNode *N) {
2317 return N->getOpcode() == ISD::STORE;
2318 }
2319};
2320
2321/// This base class is used to represent MLOAD and MSTORE nodes
2322class MaskedLoadStoreSDNode : public MemSDNode {
2323public:
2324 friend class SelectionDAG;
2325
2326 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2327 const DebugLoc &dl, SDVTList VTs,
2328 ISD::MemIndexedMode AM, EVT MemVT,
2329 MachineMemOperand *MMO)
2330 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2331 LSBaseSDNodeBits.AddressingMode = AM;
2332 assert(getAddressingMode() == AM && "Value truncated")((void)0);
2333 }
2334
2335 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2336 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2337 // Mask is a vector of i1 elements
2338 const SDValue &getOffset() const {
2339 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2340 }
2341 const SDValue &getMask() const {
2342 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2343 }
2344
2345 /// Return the addressing mode for this load or store:
2346 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2347 ISD::MemIndexedMode getAddressingMode() const {
2348 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2349 }
2350
2351 /// Return true if this is a pre/post inc/dec load/store.
2352 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2353
2354 /// Return true if this is NOT a pre/post inc/dec load/store.
2355 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2356
2357 static bool classof(const SDNode *N) {
2358 return N->getOpcode() == ISD::MLOAD ||
2359 N->getOpcode() == ISD::MSTORE;
2360 }
2361};
2362
2363/// This class is used to represent an MLOAD node
2364class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2365public:
2366 friend class SelectionDAG;
2367
2368 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2369 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2370 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2371 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2372 LoadSDNodeBits.ExtTy = ETy;
2373 LoadSDNodeBits.IsExpanding = IsExpanding;
2374 }
2375
2376 ISD::LoadExtType getExtensionType() const {
2377 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2378 }
2379
2380 const SDValue &getBasePtr() const { return getOperand(1); }
2381 const SDValue &getOffset() const { return getOperand(2); }
2382 const SDValue &getMask() const { return getOperand(3); }
2383 const SDValue &getPassThru() const { return getOperand(4); }
2384
2385 static bool classof(const SDNode *N) {
2386 return N->getOpcode() == ISD::MLOAD;
2387 }
2388
2389 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2390};
2391
2392/// This class is used to represent an MSTORE node
2393class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2394public:
2395 friend class SelectionDAG;
2396
2397 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2398 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2399 EVT MemVT, MachineMemOperand *MMO)
2400 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2401 StoreSDNodeBits.IsTruncating = isTrunc;
2402 StoreSDNodeBits.IsCompressing = isCompressing;
2403 }
2404
2405 /// Return true if the op does a truncation before store.
2406 /// For integers this is the same as doing a TRUNCATE and storing the result.
2407 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2408 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2409
2410 /// Returns true if the op does a compression to the vector before storing.
2411 /// The node contiguously stores the active elements (integers or floats)
2412 /// in src (those with their respective bit set in writemask k) to unaligned
2413 /// memory at base_addr.
2414 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2415
2416 const SDValue &getValue() const { return getOperand(1); }
2417 const SDValue &getBasePtr() const { return getOperand(2); }
2418 const SDValue &getOffset() const { return getOperand(3); }
2419 const SDValue &getMask() const { return getOperand(4); }
2420
2421 static bool classof(const SDNode *N) {
2422 return N->getOpcode() == ISD::MSTORE;
2423 }
2424};
2425
2426/// This is a base class used to represent
2427/// MGATHER and MSCATTER nodes
2428///
2429class MaskedGatherScatterSDNode : public MemSDNode {
2430public:
2431 friend class SelectionDAG;
2432
2433 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2434 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2435 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2436 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2437 LSBaseSDNodeBits.AddressingMode = IndexType;
2438 assert(getIndexType() == IndexType && "Value truncated")((void)0);
2439 }
2440
2441 /// How is Index applied to BasePtr when computing addresses.
2442 ISD::MemIndexType getIndexType() const {
2443 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2444 }
2445 void setIndexType(ISD::MemIndexType IndexType) {
2446 LSBaseSDNodeBits.AddressingMode = IndexType;
2447 }
2448 bool isIndexScaled() const {
2449 return (getIndexType() == ISD::SIGNED_SCALED) ||
2450 (getIndexType() == ISD::UNSIGNED_SCALED);
2451 }
2452 bool isIndexSigned() const {
2453 return (getIndexType() == ISD::SIGNED_SCALED) ||
2454 (getIndexType() == ISD::SIGNED_UNSCALED);
2455 }
2456
2457 // In the both nodes address is Op1, mask is Op2:
2458 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2459 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2460 // Mask is a vector of i1 elements
2461 const SDValue &getBasePtr() const { return getOperand(3); }
2462 const SDValue &getIndex() const { return getOperand(4); }
2463 const SDValue &getMask() const { return getOperand(2); }
2464 const SDValue &getScale() const { return getOperand(5); }
2465
2466 static bool classof(const SDNode *N) {
2467 return N->getOpcode() == ISD::MGATHER ||
2468 N->getOpcode() == ISD::MSCATTER;
2469 }
2470};
2471
2472/// This class is used to represent an MGATHER node
2473///
2474class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2475public:
2476 friend class SelectionDAG;
2477
2478 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2479 EVT MemVT, MachineMemOperand *MMO,
2480 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2481 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2482 IndexType) {
2483 LoadSDNodeBits.ExtTy = ETy;
2484 }
2485
2486 const SDValue &getPassThru() const { return getOperand(1); }
2487
2488 ISD::LoadExtType getExtensionType() const {
2489 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2490 }
2491
2492 static bool classof(const SDNode *N) {
2493 return N->getOpcode() == ISD::MGATHER;
2494 }
2495};
2496
2497/// This class is used to represent an MSCATTER node
2498///
2499class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2500public:
2501 friend class SelectionDAG;
2502
2503 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2504 EVT MemVT, MachineMemOperand *MMO,
2505 ISD::MemIndexType IndexType, bool IsTrunc)
2506 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2507 IndexType) {
2508 StoreSDNodeBits.IsTruncating = IsTrunc;
2509 }
2510
2511 /// Return true if the op does a truncation before store.
2512 /// For integers this is the same as doing a TRUNCATE and storing the result.
2513 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2514 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2515
2516 const SDValue &getValue() const { return getOperand(1); }
2517
2518 static bool classof(const SDNode *N) {
2519 return N->getOpcode() == ISD::MSCATTER;
2520 }
2521};
2522
2523/// An SDNode that represents everything that will be needed
2524/// to construct a MachineInstr. These nodes are created during the
2525/// instruction selection proper phase.
2526///
2527/// Note that the only supported way to set the `memoperands` is by calling the
2528/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2529/// inside the DAG rather than in the node.
2530class MachineSDNode : public SDNode {
2531private:
2532 friend class SelectionDAG;
2533
2534 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2535 : SDNode(Opc, Order, DL, VTs) {}
2536
2537 // We use a pointer union between a single `MachineMemOperand` pointer and
2538 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2539 // the number of these is zero, the single pointer variant used when the
2540 // number is one, and the array is used for larger numbers.
2541 //
2542 // The array is allocated via the `SelectionDAG`'s allocator and so will
2543 // always live until the DAG is cleaned up and doesn't require ownership here.
2544 //
2545 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2546 // subclasses aren't managed in a conforming C++ manner. See the comments on
2547 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2548 // constraint here is that these don't manage memory with their constructor or
2549 // destructor and can be initialized to a good state even if they start off
2550 // uninitialized.
2551 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2552
2553 // Note that this could be folded into the above `MemRefs` member if doing so
2554 // is advantageous at some point. We don't need to store this in most cases.
2555 // However, at the moment this doesn't appear to make the allocation any
2556 // smaller and makes the code somewhat simpler to read.
2557 int NumMemRefs = 0;
2558
2559public:
2560 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2561
2562 ArrayRef<MachineMemOperand *> memoperands() const {
2563 // Special case the common cases.
2564 if (NumMemRefs == 0)
2565 return {};
2566 if (NumMemRefs == 1)
2567 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2568
2569 // Otherwise we have an actual array.
2570 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2571 }
2572 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2573 mmo_iterator memoperands_end() const { return memoperands().end(); }
2574 bool memoperands_empty() const { return memoperands().empty(); }
2575
2576 /// Clear out the memory reference descriptor list.
2577 void clearMemRefs() {
2578 MemRefs = nullptr;
2579 NumMemRefs = 0;
2580 }
2581
2582 static bool classof(const SDNode *N) {
2583 return N->isMachineOpcode();
2584 }
2585};
2586
2587/// An SDNode that records if a register contains a value that is guaranteed to
2588/// be aligned accordingly.
2589class AssertAlignSDNode : public SDNode {
2590 Align Alignment;
2591
2592public:
2593 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2594 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2595
2596 Align getAlign() const { return Alignment; }
2597
2598 static bool classof(const SDNode *N) {
2599 return N->getOpcode() == ISD::AssertAlign;
2600 }
2601};
2602
2603class SDNodeIterator {
2604 const SDNode *Node;
2605 unsigned Operand;
2606
2607 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2608
2609public:
2610 using iterator_category = std::forward_iterator_tag;
2611 using value_type = SDNode;
2612 using difference_type = std::ptrdiff_t;
2613 using pointer = value_type *;
2614 using reference = value_type &;
2615
2616 bool operator==(const SDNodeIterator& x) const {
2617 return Operand == x.Operand;
2618 }
2619 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2620
2621 pointer operator*() const {
2622 return Node->getOperand(Operand).getNode();
2623 }
2624 pointer operator->() const { return operator*(); }
2625
2626 SDNodeIterator& operator++() { // Preincrement
2627 ++Operand;
2628 return *this;
2629 }
2630 SDNodeIterator operator++(int) { // Postincrement
2631 SDNodeIterator tmp = *this; ++*this; return tmp;
2632 }
2633 size_t operator-(SDNodeIterator Other) const {
2634 assert(Node == Other.Node &&((void)0)
2635 "Cannot compare iterators of two different nodes!")((void)0);
2636 return Operand - Other.Operand;
2637 }
2638
2639 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2640 static SDNodeIterator end (const SDNode *N) {
2641 return SDNodeIterator(N, N->getNumOperands());
2642 }
2643
2644 unsigned getOperand() const { return Operand; }
2645 const SDNode *getNode() const { return Node; }
2646};
2647
2648template <> struct GraphTraits<SDNode*> {
2649 using NodeRef = SDNode *;
2650 using ChildIteratorType = SDNodeIterator;
2651
2652 static NodeRef getEntryNode(SDNode *N) { return N; }
2653
2654 static ChildIteratorType child_begin(NodeRef N) {
2655 return SDNodeIterator::begin(N);
2656 }
2657
2658 static ChildIteratorType child_end(NodeRef N) {
2659 return SDNodeIterator::end(N);
2660 }
2661};
2662
2663/// A representation of the largest SDNode, for use in sizeof().
2664///
2665/// This needs to be a union because the largest node differs on 32 bit systems
2666/// with 4 and 8 byte pointer alignment, respectively.
2667using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2668 BlockAddressSDNode,
2669 GlobalAddressSDNode,
2670 PseudoProbeSDNode>;
2671
2672/// The SDNode class with the greatest alignment requirement.
2673using MostAlignedSDNode = GlobalAddressSDNode;
2674
2675namespace ISD {
2676
2677 /// Returns true if the specified node is a non-extending and unindexed load.
2678 inline bool isNormalLoad(const SDNode *N) {
2679 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2680 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2681 Ld->getAddressingMode() == ISD::UNINDEXED;
2682 }
2683
2684 /// Returns true if the specified node is a non-extending load.
2685 inline bool isNON_EXTLoad(const SDNode *N) {
2686 return isa<LoadSDNode>(N) &&
2687 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2688 }
2689
2690 /// Returns true if the specified node is a EXTLOAD.
2691 inline bool isEXTLoad(const SDNode *N) {
2692 return isa<LoadSDNode>(N) &&
2693 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2694 }
2695
2696 /// Returns true if the specified node is a SEXTLOAD.
2697 inline bool isSEXTLoad(const SDNode *N) {
2698 return isa<LoadSDNode>(N) &&
2699 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2700 }
2701
2702 /// Returns true if the specified node is a ZEXTLOAD.
2703 inline bool isZEXTLoad(const SDNode *N) {
2704 return isa<LoadSDNode>(N) &&
2705 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2706 }
2707
2708 /// Returns true if the specified node is an unindexed load.
2709 inline bool isUNINDEXEDLoad(const SDNode *N) {
2710 return isa<LoadSDNode>(N) &&
2711 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2712 }
2713
2714 /// Returns true if the specified node is a non-truncating
2715 /// and unindexed store.
2716 inline bool isNormalStore(const SDNode *N) {
2717 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2718 return St && !St->isTruncatingStore() &&
2719 St->getAddressingMode() == ISD::UNINDEXED;
2720 }
2721
2722 /// Returns true if the specified node is an unindexed store.
2723 inline bool isUNINDEXEDStore(const SDNode *N) {
2724 return isa<StoreSDNode>(N) &&
2725 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2726 }
2727
2728 /// Attempt to match a unary predicate against a scalar/splat constant or
2729 /// every element of a constant BUILD_VECTOR.
2730 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2731 bool matchUnaryPredicate(SDValue Op,
2732 std::function<bool(ConstantSDNode *)> Match,
2733 bool AllowUndefs = false);
2734
2735 /// Attempt to match a binary predicate against a pair of scalar/splat
2736 /// constants or every element of a pair of constant BUILD_VECTORs.
2737 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2738 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2739 bool matchBinaryPredicate(
2740 SDValue LHS, SDValue RHS,
2741 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2742 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2743
2744 /// Returns true if the specified value is the overflow result from one
2745 /// of the overflow intrinsic nodes.
2746 inline bool isOverflowIntrOpRes(SDValue Op) {
2747 unsigned Opc = Op.getOpcode();
2748 return (Op.getResNo() == 1 &&
2749 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2750 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2751 }
2752
2753} // end namespace ISD
2754
2755} // end namespace llvm
2756
2757#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H