68#define DEBUG_TYPE "x86-isel"
71 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
73 "Sets the preferable loop alignment for experiments (as log2 bytes) "
74 "for innermost loops only. If specified, this option overrides "
75 "alignment set by x86-experimental-pref-loop-alignment."),
79 "x86-br-merging-base-cost",
cl::init(2),
81 "Sets the cost threshold for when multiple conditionals will be merged "
82 "into one branch versus be split in multiple branches. Merging "
83 "conditionals saves branches at the cost of additional instructions. "
84 "This value sets the instruction cost limit, below which conditionals "
85 "will be merged, and above which conditionals will be split. Set to -1 "
86 "to never merge branches."),
90 "x86-br-merging-ccmp-bias",
cl::init(6),
91 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that the target "
92 "supports conditional compare instructions."),
97 cl::desc(
"Replace narrow shifts with wider shifts."),
101 "x86-br-merging-likely-bias",
cl::init(0),
102 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that it is likely "
103 "that all conditionals will be executed. For example for merging "
104 "the conditionals (a == b && c > d), if its known that a == b is "
105 "likely, then it is likely that if the conditionals are split "
106 "both sides will be executed, so it may be desirable to increase "
107 "the instruction cost threshold. Set to -1 to never merge likely "
112 "x86-br-merging-unlikely-bias",
cl::init(-1),
114 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
115 "that all conditionals will be executed. For example for merging "
116 "the conditionals (a == b && c > d), if its known that a == b is "
117 "unlikely, then it is unlikely that if the conditionals are split "
118 "both sides will be executed, so it may be desirable to decrease "
119 "the instruction cost threshold. Set to -1 to never merge unlikely "
124 "mul-constant-optimization",
cl::init(
true),
125 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
132 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
149 if (Subtarget.isAtom())
151 else if (Subtarget.is64Bit())
160 if (Subtarget.hasSlowDivide32())
162 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
168 static const struct {
170 const char *
const Name;
180 for (
const auto &LC : LibraryCalls) {
201 if (Subtarget.is64Bit())
218 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
227 if (Subtarget.is64Bit())
236 if (Subtarget.is64Bit())
244 if (Subtarget.is64Bit())
255 if (Subtarget.is64Bit())
259 if (!Subtarget.useSoftFloat()) {
323 if (!Subtarget.is64Bit()) {
332 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
337 if (Subtarget.is64Bit()) {
343 if (Subtarget.hasAVX10_2()) {
346 for (
MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
351 if (Subtarget.hasAVX10_2_512()) {
355 if (Subtarget.is64Bit()) {
372 if (Subtarget.is64Bit()) {
377 }
else if (!Subtarget.is64Bit())
390 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
401 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
402 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
406 if (Subtarget.is64Bit())
417 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
435 if (!Subtarget.hasBMI()) {
438 if (Subtarget.is64Bit()) {
445 if (Subtarget.hasLZCNT()) {
451 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
452 if (VT == MVT::i64 && !Subtarget.is64Bit())
466 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
473 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
478 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
491 if (Subtarget.is64Bit())
493 if (Subtarget.hasPOPCNT()) {
507 if (!Subtarget.hasMOVBE())
511 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
517 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
518 if (VT == MVT::i64 && !Subtarget.is64Bit())
538 for (
auto VT : { MVT::i32, MVT::i64 }) {
539 if (VT == MVT::i64 && !Subtarget.is64Bit())
550 for (
auto VT : { MVT::i32, MVT::i64 }) {
551 if (VT == MVT::i64 && !Subtarget.is64Bit())
564 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
574 if (!Subtarget.is64Bit())
577 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
610 bool Is64Bit = Subtarget.is64Bit();
664 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
668 : &X86::FR16RegClass);
670 : &X86::FR32RegClass);
672 : &X86::FR64RegClass);
680 for (
auto VT : { MVT::f32, MVT::f64 }) {
701 setF16Action(MVT::f16,
Promote);
748 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
749 (UseX87 || Is64Bit)) {
787 for (
auto VT : { MVT::f32, MVT::f64 }) {
800 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
801 addLegalFPImmediate(
APFloat(+0.0f));
802 addLegalFPImmediate(
APFloat(+1.0f));
803 addLegalFPImmediate(
APFloat(-0.0f));
804 addLegalFPImmediate(
APFloat(-1.0f));
806 addLegalFPImmediate(
APFloat(+0.0f));
811 addLegalFPImmediate(
APFloat(+0.0));
812 addLegalFPImmediate(
APFloat(+1.0));
813 addLegalFPImmediate(
APFloat(-0.0));
814 addLegalFPImmediate(
APFloat(-1.0));
816 addLegalFPImmediate(
APFloat(+0.0));
847 addLegalFPImmediate(TmpFlt);
849 addLegalFPImmediate(TmpFlt);
855 addLegalFPImmediate(TmpFlt2);
857 addLegalFPImmediate(TmpFlt2);
905 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
907 : &X86::VR128RegClass);
984 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
985 MVT::v4f32, MVT::v8f32, MVT::v16f32,
986 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
1069 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1074 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
1076 : &X86::VR128RegClass);
1104 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1106 : &X86::VR128RegClass);
1111 : &X86::VR128RegClass);
1113 : &X86::VR128RegClass);
1115 : &X86::VR128RegClass);
1117 : &X86::VR128RegClass);
1119 : &X86::VR128RegClass);
1121 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1128 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1129 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1164 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1187 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1207 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1215 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1220 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1226 setF16Action(MVT::v8f16,
Expand);
1251 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1325 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1329 if (VT == MVT::v2i64)
continue;
1343 if (Subtarget.hasGFNI()) {
1350 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1355 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1367 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1368 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1408 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1423 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1435 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1439 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1440 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1441 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1447 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1451 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1455 : &X86::VR256RegClass);
1457 : &X86::VR256RegClass);
1459 : &X86::VR256RegClass);
1461 : &X86::VR256RegClass);
1463 : &X86::VR256RegClass);
1465 : &X86::VR256RegClass);
1467 : &X86::VR256RegClass);
1469 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1533 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1539 if (VT == MVT::v4i64)
continue;
1560 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1571 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1591 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1592 MVT::v2f64, MVT::v4f64 }) {
1598 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1639 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1647 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1669 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1670 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1677 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1678 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1683 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1684 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1695 setF16Action(MVT::v16f16,
Expand);
1711 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1712 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1717 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1718 Subtarget.hasF16C()) {
1719 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1723 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1738 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1766 if (!Subtarget.hasDQI()) {
1779 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1785 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1788 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1801 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1804 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1805 for (
MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1814 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1815 bool HasBWI = Subtarget.hasBWI();
1835 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1851 if (Subtarget.hasDQI())
1854 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1861 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1898 if (!Subtarget.hasVLX()) {
1899 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1900 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1926 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1943 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1970 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1994 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
2003 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2024 if (Subtarget.hasDQI()) {
2032 if (Subtarget.hasCDI()) {
2034 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
2039 if (Subtarget.hasVPOPCNTDQ()) {
2040 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
2047 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
2048 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2051 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2052 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2063 setF16Action(MVT::v32f16,
Expand);
2072 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2079 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2088 if (Subtarget.hasVBMI2()) {
2089 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2103 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2104 for (
auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2114 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
2122 if (Subtarget.hasDQI()) {
2127 "Unexpected operation action!");
2135 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2143 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2152 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2153 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2156 if (Subtarget.hasDQI()) {
2167 if (Subtarget.hasCDI()) {
2168 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2173 if (Subtarget.hasVPOPCNTDQ()) {
2174 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2181 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2182 MVT::v4f64, MVT::v2i64, MVT::v2f64, MVT::v16i8, MVT::v8i16,
2183 MVT::v16i16, MVT::v8i8})
2188 for (
MVT VT : {MVT::v16i32, MVT::v16f32, MVT::v8i64, MVT::v8f64})
2192 if (Subtarget.hasVLX())
2193 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2194 MVT::v4f64, MVT::v2i64, MVT::v2f64})
2198 if (Subtarget.hasVBMI2())
2199 for (
MVT VT : {MVT::v32i16, MVT::v64i8})
2203 if (Subtarget.hasVBMI2() && Subtarget.hasVLX())
2204 for (
MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v32i8, MVT::v16i16})
2210 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2214 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2227 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2235 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2244 if (Subtarget.hasBITALG()) {
2245 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2250 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2251 auto setGroup = [&] (
MVT VT) {
2320 setGroup(MVT::v32f16);
2364 if (Subtarget.hasVLX()) {
2365 setGroup(MVT::v8f16);
2366 setGroup(MVT::v16f16);
2419 if (!Subtarget.useSoftFloat() &&
2420 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2422 : &X86::VR128RegClass);
2424 : &X86::VR256RegClass);
2430 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2431 setF16Action(VT,
Expand);
2432 if (!Subtarget.hasBF16())
2449 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
2452 setF16Action(MVT::v32bf16,
Expand);
2463 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
2464 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2477 if (Subtarget.hasAVX10_2_512()) {
2490 for (
auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
2496 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2509 if (Subtarget.hasBWI()) {
2514 if (Subtarget.hasFP16()) {
2546 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2554 if (!Subtarget.is64Bit()) {
2564 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2565 if (VT == MVT::i64 && !Subtarget.is64Bit())
2609 if (Subtarget.is32Bit() &&
2749 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2756 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2757 !Subtarget.hasBWI())
2782 bool AssumeSingleUse) {
2783 if (!AssumeSingleUse && !
Op.hasOneUse())
2789 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2790 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2791 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2802 bool AssumeSingleUse) {
2803 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2809 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2810 return !Ld->isVolatile() ||
2815 if (!
Op.hasOneUse())
2828 if (
Op.hasOneUse()) {
2829 unsigned Opcode =
Op.getNode()->user_begin()->getOpcode();
2842 default:
return false;
2883 default:
return false;
2904 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2906 if (ReturnAddrIndex == 0) {
2919 bool HasSymbolicDisplacement) {
2926 if (!HasSymbolicDisplacement)
2944 return Offset < 16 * 1024 * 1024;
2968 switch (SetCCOpcode) {
2993 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2998 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
3002 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
3006 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
3021 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
3025 switch (SetCCOpcode) {
3041 switch (SetCCOpcode) {
3095 unsigned Intrinsic)
const {
3101 switch (Intrinsic) {
3102 case Intrinsic::x86_aesenc128kl:
3103 case Intrinsic::x86_aesdec128kl:
3105 Info.ptrVal =
I.getArgOperand(1);
3110 case Intrinsic::x86_aesenc256kl:
3111 case Intrinsic::x86_aesdec256kl:
3113 Info.ptrVal =
I.getArgOperand(1);
3118 case Intrinsic::x86_aesencwide128kl:
3119 case Intrinsic::x86_aesdecwide128kl:
3121 Info.ptrVal =
I.getArgOperand(0);
3126 case Intrinsic::x86_aesencwide256kl:
3127 case Intrinsic::x86_aesdecwide256kl:
3129 Info.ptrVal =
I.getArgOperand(0);
3134 case Intrinsic::x86_cmpccxadd32:
3135 case Intrinsic::x86_cmpccxadd64:
3136 case Intrinsic::x86_atomic_bts:
3137 case Intrinsic::x86_atomic_btc:
3138 case Intrinsic::x86_atomic_btr: {
3140 Info.ptrVal =
I.getArgOperand(0);
3141 unsigned Size =
I.getType()->getScalarSizeInBits();
3148 case Intrinsic::x86_atomic_bts_rm:
3149 case Intrinsic::x86_atomic_btc_rm:
3150 case Intrinsic::x86_atomic_btr_rm: {
3152 Info.ptrVal =
I.getArgOperand(0);
3153 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3160 case Intrinsic::x86_aadd32:
3161 case Intrinsic::x86_aadd64:
3162 case Intrinsic::x86_aand32:
3163 case Intrinsic::x86_aand64:
3164 case Intrinsic::x86_aor32:
3165 case Intrinsic::x86_aor64:
3166 case Intrinsic::x86_axor32:
3167 case Intrinsic::x86_axor64:
3168 case Intrinsic::x86_atomic_add_cc:
3169 case Intrinsic::x86_atomic_sub_cc:
3170 case Intrinsic::x86_atomic_or_cc:
3171 case Intrinsic::x86_atomic_and_cc:
3172 case Intrinsic::x86_atomic_xor_cc: {
3174 Info.ptrVal =
I.getArgOperand(0);
3175 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3186 switch (IntrData->
Type) {
3191 Info.ptrVal =
I.getArgOperand(0);
3197 ScalarVT = MVT::i16;
3199 ScalarVT = MVT::i32;
3209 Info.ptrVal =
nullptr;
3221 Info.ptrVal =
nullptr;
3242 bool ForCodeSize)
const {
3243 for (
const APFloat &FPImm : LegalFPImmediates)
3244 if (Imm.bitwiseIsEqual(FPImm))
3252 assert(cast<LoadSDNode>(Load)->
isSimple() &&
"illegal to narrow");
3256 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3258 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3264 EVT VT = Load->getValueType(0);
3268 if (
Use.getResNo() != 0)
3292 if (BitSize == 0 || BitSize > 64)
3339 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
3343 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
3344 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
3348 unsigned Index)
const {
3390 return Subtarget.hasBMI() || Subtarget.
canUseCMOV() ||
3397 return Subtarget.hasLZCNT() || Subtarget.
canUseCMOV();
3404 return !Subtarget.
hasSSE2() || VT == MVT::f80;
3408 return (VT == MVT::f64 && Subtarget.
hasSSE2()) ||
3409 (VT == MVT::f32 && Subtarget.
hasSSE1()) || VT == MVT::f16;
3419 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
3437 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
3449 return Subtarget.hasFastLZCNT();
3458 EVT VT =
Y.getValueType();
3463 if (!Subtarget.hasBMI())
3467 if (VT != MVT::i32 && VT != MVT::i64)
3470 return !isa<ConstantSDNode>(
Y) || cast<ConstantSDNode>(
Y)->isOpaque();
3474 EVT VT =
Y.getValueType();
3484 if (VT == MVT::v4i32)
3491 return X.getValueType().isScalarInteger();
3497 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
3501 X, XC,
CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
3504 if (
X.getValueType().isScalarInteger())
3518 EVT VT,
unsigned ShiftOpc,
bool MayTransformRotate,
3519 const APInt &ShiftOrRotateAmt,
const std::optional<APInt> &AndMask)
const {
3523 bool PreferRotate =
false;
3532 PreferRotate = Subtarget.hasBMI2();
3533 if (!PreferRotate) {
3536 PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
3541 assert(AndMask.has_value() &&
"Null andmask when querying about shift+and");
3543 if (PreferRotate && MayTransformRotate)
3577 if (PreferRotate || !MayTransformRotate || VT.
isVector())
3587 const Value *Rhs)
const {
3591 if (BaseCost >= 0 && Subtarget.hasCCMP())
3594 if (BaseCost >= 0 && Opc == Instruction::And &&
3609 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
3611 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
3612 "Expected shift-shift mask");
3614 EVT VT =
N->getValueType(0);
3615 if ((Subtarget.hasFastVectorShiftMasks() && VT.
isVector()) ||
3616 (Subtarget.hasFastScalarShiftMasks() && !VT.
isVector())) {
3620 return N->getOperand(1) ==
N->getOperand(0).getOperand(1);
3626 EVT VT =
Y.getValueType();
3633 if (VT == MVT::i64 && !Subtarget.is64Bit())
3693 [CmpVal](
int M) { return isUndefOrEqual(M, CmpVal); });
3705 [](
int M) { return M == SM_SentinelUndef; });
3710 unsigned NumElts = Mask.size();
3716 unsigned NumElts = Mask.size();
3722 return (Val >=
Low && Val <
Hi);
3765 unsigned NumElts = Mask.size();
3776 unsigned Size,
int Low,
int Step = 1) {
3777 for (
unsigned i = Pos, e = Pos +
Size; i != e; ++i,
Low += Step)
3789 for (
unsigned i = Pos, e = Pos +
Size; i != e; ++i,
Low += Step)
3805 unsigned NumElts = Mask.size();
3824 WidenedMask.
assign(Mask.size() / 2, 0);
3825 for (
int i = 0,
Size = Mask.size(); i <
Size; i += 2) {
3827 int M1 = Mask[i + 1];
3838 WidenedMask[i / 2] =
M1 / 2;
3842 WidenedMask[i / 2] =
M0 / 2;
3859 WidenedMask[i / 2] =
M0 / 2;
3866 assert(WidenedMask.
size() == Mask.size() / 2 &&
3867 "Incorrect size of mask after widening the elements!");
3873 const APInt &Zeroable,
3880 assert(!Zeroable.
isZero() &&
"V2's non-undef elements are used?!");
3881 for (
int i = 0,
Size = Mask.size(); i !=
Size; ++i)
3897 unsigned NumSrcElts = Mask.size();
3898 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
3899 "Illegal shuffle scale factor");
3902 if (NumDstElts >= NumSrcElts) {
3903 int Scale = NumDstElts / NumSrcElts;
3911 while (ScaledMask.
size() > NumDstElts) {
3915 ScaledMask = std::move(WidenedMask);
3937 const SDLoc &dl,
bool IsMask =
false) {
3942 MVT ConstVecVT = VT;
3951 for (
unsigned i = 0; i < NumElts; ++i) {
3952 bool IsUndef = Values[i] < 0 && IsMask;
3969 "Unequal constant and undef arrays");
3973 MVT ConstVecVT = VT;
3982 for (
unsigned i = 0, e = Bits.size(); i != e; ++i) {
3987 const APInt &V = Bits[i];
3992 }
else if (EltVT == MVT::f32) {
3995 }
else if (EltVT == MVT::f64) {
4018 "Unexpected vector type");
4032 "Unexpected vector type");
4046 LHS.getValueType() !=
RHS.getValueType() ||
4047 LHS.getOperand(0) !=
RHS.getOperand(0))
4051 if (Src.getValueSizeInBits() != (
LHS.getValueSizeInBits() * 2))
4054 unsigned NumElts =
LHS.getValueType().getVectorNumElements();
4055 if ((
LHS.getConstantOperandAPInt(1) == 0 &&
4056 RHS.getConstantOperandAPInt(1) == NumElts) ||
4057 (AllowCommute &&
RHS.getConstantOperandAPInt(1) == 0 &&
4058 LHS.getConstantOperandAPInt(1) == NumElts))
4065 const SDLoc &dl,
unsigned vectorWidth) {
4073 unsigned ElemsPerChunk = vectorWidth / ElVT.
getSizeInBits();
4078 IdxVal &= ~(ElemsPerChunk - 1);
4083 Vec->
ops().slice(IdxVal, ElemsPerChunk));
4117 unsigned vectorWidth) {
4118 assert((vectorWidth == 128 || vectorWidth == 256) &&
4119 "Unsupported vector width");
4125 EVT ResultVT = Result.getValueType();
4133 IdxVal &= ~(ElemsPerChunk - 1);
4159 "Unsupported vector widening type");
4180 const SDLoc &dl,
unsigned WideSizeInBits) {
4183 "Unsupported vector widening type");
4187 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
4195 if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
4196 return Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
4206 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
4214 assert(Ops.
empty() &&
"Expected an empty ops vector");
4217 Ops.
append(
N->op_begin(),
N->op_end());
4224 const APInt &
Idx =
N->getConstantOperandAPInt(2);
4225 EVT VT = Src.getValueType();
4230 if (
Idx == 0 && Src.isUndef()) {
4238 Src.getOperand(1).getValueType() == SubVT &&
4262 if (Src.isUndef()) {
4282 unsigned NumSubOps = SubOps.
size();
4283 unsigned HalfNumSubOps = NumSubOps / 2;
4284 assert((NumSubOps % 2) == 0 &&
"Unexpected number of subvectors");
4290 EVT HalfVT = V.getValueType().getHalfNumVectorElementsVT(*DAG.
getContext());
4304 EVT VT =
Op.getValueType();
4307 assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
4308 "Can't split odd sized vector");
4314 return std::make_pair(
Lo,
Lo);
4317 return std::make_pair(
Lo,
Hi);
4323 EVT VT =
Op.getValueType();
4328 for (
unsigned I = 0;
I != NumOps; ++
I) {
4330 if (!
SrcOp.getValueType().isVector()) {
4340 DAG.
getNode(
Op.getOpcode(), dl, LoVT, LoOps),
4341 DAG.
getNode(
Op.getOpcode(), dl, HiVT, HiOps));
4350 [[maybe_unused]]
EVT VT =
Op.getValueType();
4351 assert((
Op.getOperand(0).getValueType().is256BitVector() ||
4352 Op.getOperand(0).getValueType().is512BitVector()) &&
4354 assert(
Op.getOperand(0).getValueType().getVectorNumElements() ==
4365 [[maybe_unused]]
EVT VT =
Op.getValueType();
4366 assert(
Op.getOperand(0).getValueType() == VT &&
4367 Op.getOperand(1).getValueType() == VT &&
"Unexpected VTs!");
4379template <
typename F>
4382 F Builder,
bool CheckBWI =
true) {
4383 assert(Subtarget.
hasSSE2() &&
"Target assumed to support at least SSE2");
4384 unsigned NumSubs = 1;
4391 }
else if (Subtarget.
hasAVX2()) {
4404 return Builder(DAG,
DL, Ops);
4407 for (
unsigned i = 0; i != NumSubs; ++i) {
4410 EVT OpVT =
Op.getValueType();
4434 if (!OpVT.isInteger() || OpEltSizeInBits < 32 ||
4441 APInt SplatValue, SplatUndef;
4442 unsigned SplatBitSize;
4444 if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
4445 HasAnyUndefs, OpEltSizeInBits) &&
4446 !HasAnyUndefs && SplatValue.
getBitWidth() == OpEltSizeInBits)
4461 MVT OpVT =
Op.getSimpleValueType();
4465 assert(OpVT == VT &&
"Vector type mismatch");
4467 if (
SDValue BroadcastOp = MakeBroadcastOp(
Op, OpVT, DstVT)) {
4493 unsigned IdxVal =
Op.getConstantOperandVal(2);
4499 if (IdxVal == 0 && Vec.
isUndef())
4502 MVT OpVT =
Op.getSimpleValueType();
4521 assert(IdxVal + SubVecNumElems <= NumElems &&
4523 "Unexpected index value in INSERT_SUBVECTOR");
4543 Undef, SubVec, ZeroIdx);
4546 assert(IdxVal != 0 &&
"Unexpected index");
4553 assert(IdxVal != 0 &&
"Unexpected index");
4556 [](
SDValue V) { return V.isUndef(); })) {
4561 unsigned ShiftLeft = NumElems - SubVecNumElems;
4562 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
4565 if (ShiftRight != 0)
4573 if (IdxVal + SubVecNumElems == NumElems) {
4576 if (SubVecNumElems * 2 == NumElems) {
4586 Undef, Vec, ZeroIdx);
4603 unsigned ShiftLeft = NumElems - SubVecNumElems;
4604 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
4607 if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) {
4630 unsigned LowShift = NumElems - IdxVal;
4637 unsigned HighShift = IdxVal + SubVecNumElems;
4668 "Expected a 128/256/512-bit vector type");
4676 EVT InVT = In.getValueType();
4680 "Unknown extension opcode");
4686 "Expected VTs to be the same size!");
4690 InVT = In.getValueType();
4708 bool Lo,
bool Unary) {
4710 "Illegal vector type to unpack");
4711 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
4714 for (
int i = 0; i < NumElts; ++i) {
4715 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
4716 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
4717 Pos += (Unary ? 0 : NumElts * (i % 2));
4718 Pos += (
Lo ? 0 : NumEltsInLane / 2);
4719 Mask.push_back(Pos);
4729 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
4731 for (
int i = 0; i < NumElts; ++i) {
4733 Pos += (
Lo ? 0 : NumElts / 2);
4734 Mask.push_back(Pos);
4744 for (
int I = 0, NumElts = Mask.size();
I != NumElts; ++
I) {
4748 SDValue V = (M < NumElts) ? V1 : V2;
4751 Ops[
I] = V.getOperand(M % NumElts);
4780 bool PackHiHalf =
false) {
4781 MVT OpVT =
LHS.getSimpleValueType();
4783 bool UsePackUS = Subtarget.
hasSSE41() || EltSizeInBits == 8;
4784 assert(OpVT ==
RHS.getSimpleValueType() &&
4787 "Unexpected PACK operand types");
4788 assert((EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) &&
4789 "Unexpected PACK result type");
4792 if (EltSizeInBits == 32) {
4794 int Offset = PackHiHalf ? 1 : 0;
4796 for (
int I = 0;
I != NumElts;
I += 4) {
4849 MVT VT = V2.getSimpleValueType();
4854 for (
int i = 0; i != NumElems; ++i)
4856 MaskVec[i] = (i ==
Idx) ? NumElems : i;
4864 return dyn_cast<ConstantPoolSDNode>(
Ptr);
4888 assert(LD &&
"Unexpected null LoadSDNode");
4896 bool AllowWholeUndefs =
true,
4897 bool AllowPartialUndefs =
false) {
4898 assert(EltBits.
empty() &&
"Expected an empty EltBits vector");
4902 EVT VT =
Op.getValueType();
4904 assert((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!");
4905 unsigned NumElts = SizeInBits / EltSizeInBits;
4910 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
4911 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
4912 "Constant bit sizes don't match");
4915 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
4920 if (NumSrcElts == NumElts) {
4921 UndefElts = UndefSrcElts;
4922 EltBits.
assign(SrcEltBits.begin(), SrcEltBits.end());
4927 APInt UndefBits(SizeInBits, 0);
4928 APInt MaskBits(SizeInBits, 0);
4930 for (
unsigned i = 0; i != NumSrcElts; ++i) {
4931 unsigned BitOffset = i * SrcEltSizeInBits;
4932 if (UndefSrcElts[i])
4933 UndefBits.
setBits(BitOffset, BitOffset + SrcEltSizeInBits);
4934 MaskBits.
insertBits(SrcEltBits[i], BitOffset);
4938 UndefElts =
APInt(NumElts, 0);
4941 for (
unsigned i = 0; i != NumElts; ++i) {
4942 unsigned BitOffset = i * EltSizeInBits;
4947 if (!AllowWholeUndefs)
4955 if (UndefEltBits.
getBoolValue() && !AllowPartialUndefs)
4958 EltBits[i] = MaskBits.
extractBits(EltSizeInBits, BitOffset);
4965 unsigned UndefBitIndex) {
4968 if (isa<UndefValue>(Cst)) {
4969 Undefs.
setBit(UndefBitIndex);
4972 if (
auto *CInt = dyn_cast<ConstantInt>(Cst)) {
4973 Mask = CInt->getValue();
4976 if (
auto *CFP = dyn_cast<ConstantFP>(Cst)) {
4977 Mask = CFP->getValueAPF().bitcastToAPInt();
4980 if (
auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
4981 Type *Ty = CDS->getType();
4983 Type *EltTy = CDS->getElementType();
4987 if (!IsInteger && !IsFP)
4990 for (
unsigned I = 0, E = CDS->getNumElements();
I != E; ++
I)
4992 Mask.insertBits(CDS->getElementAsAPInt(
I),
I * EltBits);
4994 Mask.insertBits(CDS->getElementAsAPFloat(
I).bitcastToAPInt(),
5005 return CastBitData(UndefSrcElts, SrcEltBits);
5009 if (
auto *Cst = dyn_cast<ConstantSDNode>(
Op)) {
5012 return CastBitData(UndefSrcElts, SrcEltBits);
5014 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
5016 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5018 return CastBitData(UndefSrcElts, SrcEltBits);
5022 if (
auto *BV = dyn_cast<BuildVectorSDNode>(
Op)) {
5026 if (BV->getConstantRawBits(
true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
5028 for (
unsigned I = 0, E = SrcEltBits.
size();
I != E; ++
I)
5031 return CastBitData(UndefSrcElts, SrcEltBits);
5039 if (!CstTy->
isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5043 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5044 if ((SizeInBits % SrcEltSizeInBits) != 0)
5047 APInt UndefSrcElts(NumSrcElts, 0);
5049 for (
unsigned i = 0; i != NumSrcElts; ++i)
5054 return CastBitData(UndefSrcElts, SrcEltBits);
5060 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
5067 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5069 APInt UndefSrcElts(NumSrcElts, 0);
5071 if (CollectConstantBits(
C, SrcEltBits[0], UndefSrcElts, 0)) {
5072 if (UndefSrcElts[0])
5073 UndefSrcElts.
setBits(0, NumSrcElts);
5074 if (SrcEltBits[0].
getBitWidth() != SrcEltSizeInBits)
5075 SrcEltBits[0] = SrcEltBits[0].trunc(SrcEltSizeInBits);
5076 SrcEltBits.
append(NumSrcElts - 1, SrcEltBits[0]);
5077 return CastBitData(UndefSrcElts, SrcEltBits);
5084 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
5091 unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
5092 if (!CstTy->
isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
5093 (SizeInBits % SubVecSizeInBits) != 0)
5096 unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
5097 unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
5098 APInt UndefSubElts(NumSubElts, 0);
5100 APInt(CstEltSizeInBits, 0));
5101 for (
unsigned i = 0; i != NumSubElts; ++i) {
5105 for (
unsigned j = 1; j != NumSubVecs; ++j)
5106 SubEltBits[i + (j * NumSubElts)] = SubEltBits[i];
5110 return CastBitData(UndefSubElts, SubEltBits);
5117 isa<ConstantSDNode>(
Op.getOperand(0).getOperand(0))) {
5119 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5121 APInt UndefSrcElts(NumSrcElts, 0);
5123 const APInt &
C =
Op.getOperand(0).getConstantOperandAPInt(0);
5124 SrcEltBits.
push_back(
C.zextOrTrunc(SrcEltSizeInBits));
5125 SrcEltBits.
append(NumSrcElts - 1,
APInt(SrcEltSizeInBits, 0));
5126 return CastBitData(UndefSrcElts, SrcEltBits);
5134 bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits;
5136 APInt UndefSrcElts, UndefSubElts;
5139 UndefSubElts, EltSubBits,
5140 AllowWholeUndefs && AllowUndefs,
5141 AllowPartialUndefs && AllowUndefs) &&
5143 UndefSrcElts, EltSrcBits,
5144 AllowWholeUndefs && AllowUndefs,
5145 AllowPartialUndefs && AllowUndefs)) {
5146 unsigned BaseIdx =
Op.getConstantOperandVal(2);
5147 UndefSrcElts.
insertBits(UndefSubElts, BaseIdx);
5148 for (
unsigned i = 0, e = EltSubBits.
size(); i != e; ++i)
5149 EltSrcBits[BaseIdx + i] = EltSubBits[i];
5150 return CastBitData(UndefSrcElts, EltSrcBits);
5161 UndefElts, EltBits, AllowWholeUndefs,
5162 AllowPartialUndefs)) {
5163 EVT SrcVT =
Op.getOperand(0).getValueType();
5166 unsigned BaseIdx =
Op.getConstantOperandVal(1);
5167 UndefElts = UndefElts.
extractBits(NumSubElts, BaseIdx);
5168 if ((BaseIdx + NumSubElts) != NumSrcElts)
5169 EltBits.
erase(EltBits.
begin() + BaseIdx + NumSubElts, EltBits.
end());
5177 if (
auto *SVN = dyn_cast<ShuffleVectorSDNode>(
Op)) {
5183 if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
5187 APInt UndefElts0, UndefElts1;
5191 UndefElts0, EltBits0, AllowWholeUndefs,
5192 AllowPartialUndefs))
5196 UndefElts1, EltBits1, AllowWholeUndefs,
5197 AllowPartialUndefs))
5201 for (
int i = 0; i != (int)NumElts; ++i) {
5206 }
else if (M < (
int)NumElts) {
5211 if (UndefElts1[M - NumElts])
5213 EltBits.
push_back(EltBits1[M - NumElts]);
5228 Op,
Op.getScalarValueSizeInBits(), UndefElts, EltBits,
5229 true, AllowPartialUndefs)) {
5230 int SplatIndex = -1;
5231 for (
int i = 0, e = EltBits.
size(); i != e; ++i) {
5234 if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
5240 if (0 <= SplatIndex) {
5241 SplatVal = EltBits[SplatIndex];
5252 unsigned MaskEltSizeInBits,
5263 for (
const APInt &Elt : EltBits)
5278 bool IsPow2OrUndef =
true;
5279 for (
unsigned I = 0, E = EltBits.
size();
I != E; ++
I)
5280 IsPow2OrUndef &= UndefElts[
I] || EltBits[
I].isPowerOf2();
5281 return IsPow2OrUndef;
5288 EVT VT = V.getValueType();
5294 return V.getOperand(0);
5298 (
isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
5300 Not = DAG.
getBitcast(V.getOperand(0).getValueType(), Not);
5310 V.getOperand(0).hasOneUse()) {
5314 V.getScalarValueSizeInBits(), UndefElts,
5318 bool MinSigned =
false;
5319 for (
APInt &Elt : EltBits) {
5320 MinSigned |= Elt.isMinSignedValue();
5325 MVT VT = V.getSimpleValueType();
5335 for (
SDValue &CatOp : CatOps) {
5339 CatOp = DAG.
getBitcast(CatOp.getValueType(), NotCat);
5346 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
5361 bool Unary,
unsigned NumStages = 1) {
5362 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
5366 unsigned Offset = Unary ? 0 : NumElts;
5367 unsigned Repetitions = 1u << (NumStages - 1);
5368 unsigned Increment = 1u << NumStages;
5369 assert((NumEltsPerLane >> NumStages) > 0 &&
"Illegal packing compaction");
5371 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5372 for (
unsigned Stage = 0; Stage != Repetitions; ++Stage) {
5373 for (
unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
5374 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5375 for (
unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
5376 Mask.push_back(Elt + (Lane * NumEltsPerLane) +
Offset);
5386 int NumInnerElts = NumElts / 2;
5387 int NumEltsPerLane = NumElts / NumLanes;
5388 int NumInnerEltsPerLane = NumInnerElts / NumLanes;
5394 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
5395 for (
int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
5396 int OuterIdx = (Lane * NumEltsPerLane) + Elt;
5397 int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
5398 if (DemandedElts[OuterIdx])
5399 DemandedLHS.
setBit(InnerIdx);
5400 if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
5401 DemandedRHS.
setBit(InnerIdx);
5410 DemandedLHS, DemandedRHS);
5411 DemandedLHS |= DemandedLHS << 1;
5412 DemandedRHS |= DemandedRHS << 1;
5428 MVT VT =
N.getSimpleValueType();
5435 assert(Mask.empty() &&
"getTargetShuffleMask expects an empty Mask vector");
5436 assert(Ops.
empty() &&
"getTargetShuffleMask expects an empty Ops vector");
5439 bool IsFakeUnary =
false;
5440 switch (
N.getOpcode()) {
5442 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5443 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5444 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5446 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5449 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5450 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5451 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5453 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5456 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5457 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5458 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5460 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5463 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5464 if (isa<ConstantSDNode>(
N.getOperand(1)) &&
5465 isa<ConstantSDNode>(
N.getOperand(2))) {
5466 int BitLen =
N.getConstantOperandVal(1);
5467 int BitIdx =
N.getConstantOperandVal(2);
5473 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5474 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5475 if (isa<ConstantSDNode>(
N.getOperand(2)) &&
5476 isa<ConstantSDNode>(
N.getOperand(3))) {
5477 int BitLen =
N.getConstantOperandVal(2);
5478 int BitIdx =
N.getConstantOperandVal(3);
5480 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5484 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5485 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5487 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5490 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5491 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5493 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5496 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5497 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5499 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5502 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5503 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5505 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5509 "Only 32-bit and 64-bit elements are supported!");
5510 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5511 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5512 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5514 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5520 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5521 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5522 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5524 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5530 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5531 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5537 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5538 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5544 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5545 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5550 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5551 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5556 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5557 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5562 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5570 if (
N.getOperand(0).getValueType() == VT) {
5577 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5579 SDValue MaskNode =
N.getOperand(1);
5589 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5590 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5592 SDValue MaskNode =
N.getOperand(1);
5600 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5601 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5608 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5609 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5613 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5614 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5615 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5617 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5620 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5621 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5622 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5624 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5627 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5632 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5637 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5642 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5643 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5644 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5645 SDValue MaskNode =
N.getOperand(2);
5646 SDValue CtrlNode =
N.getOperand(3);
5647 if (
ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5648 unsigned CtrlImm = CtrlOp->getZExtValue();
5659 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5660 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5661 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5662 SDValue MaskNode =
N.getOperand(2);
5670 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5674 SDValue MaskNode =
N.getOperand(0);
5683 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5684 assert(
N.getOperand(2).getValueType() == VT &&
"Unexpected value type");
5685 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(2);
5689 SDValue MaskNode =
N.getOperand(1);
5706 if (!AllowSentinelZero &&
isAnyZero(Mask))
5714 if (M >= (
int)Mask.size())
5721 if (!IsUnary || IsFakeUnary)
5747 int Size = Mask.size();
5757 int ScalarSizeInBits = VectorSizeInBits /
Size;
5758 assert(!(VectorSizeInBits % ScalarSizeInBits) &&
"Illegal shuffle mask size");
5760 for (
int i = 0; i <
Size; ++i) {
5767 if ((M >= 0 && M <
Size && V1IsZero) || (M >=
Size && V2IsZero)) {
5782 if ((
Size % V.getNumOperands()) == 0) {
5783 int Scale =
Size / V->getNumOperands();
5790 APInt Val = Cst->getAPIntValue();
5791 Val = Val.
extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
5795 APInt Val = Cst->getValueAPF().bitcastToAPInt();
5796 Val = Val.
extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
5805 if ((V.getNumOperands() %
Size) == 0) {
5806 int Scale = V->getNumOperands() /
Size;
5807 bool AllUndef =
true;
5808 bool AllZero =
true;
5809 for (
int j = 0; j < Scale; ++j) {
5810 SDValue Op = V.getOperand((M * Scale) + j);
5811 AllUndef &=
Op.isUndef();
5834 MVT VT =
N.getSimpleValueType();
5838 int Size = Mask.size();
5840 SDValue V2 = IsUnary ? V1 : Ops[1];
5847 "Illegal split of shuffle value type");
5851 APInt UndefSrcElts[2];
5853 bool IsSrcConstant[2] = {
5855 SrcEltBits[0],
true,
5858 SrcEltBits[1],
true,
5861 for (
int i = 0; i <
Size; ++i) {
5875 unsigned SrcIdx = M /
Size;
5890 (
Size % V.getValueType().getVectorNumElements()) == 0) {
5891 int Scale =
Size / V.getValueType().getVectorNumElements();
5892 int Idx = M / Scale;
5903 SDValue Vec = V.getOperand(0);
5906 int Idx = V.getConstantOperandVal(2);
5907 int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements();
5908 if (M <
Idx || (
Idx + NumSubElts) <= M)
5915 if (IsSrcConstant[SrcIdx]) {
5916 if (UndefSrcElts[SrcIdx][M])
5918 else if (SrcEltBits[SrcIdx][M] == 0)
5924 "Different mask size from vector size!");
5930 const APInt &KnownUndef,
5931 const APInt &KnownZero,
5932 bool ResolveKnownZeros=
true) {
5933 unsigned NumElts = Mask.size();
5935 KnownZero.
getBitWidth() == NumElts &&
"Shuffle mask size mismatch");
5937 for (
unsigned i = 0; i != NumElts; ++i) {
5940 else if (ResolveKnownZeros && KnownZero[i])
5949 unsigned NumElts = Mask.size();
5952 for (
unsigned i = 0; i != NumElts; ++i) {
5964 EVT CondVT =
Cond.getValueType();
5977 for (
int i = 0; i != (int)NumElts; ++i) {
5982 if (UndefElts[i] || (!IsBLENDV && EltBits[i].
isZero()) ||
5983 (IsBLENDV && EltBits[i].isNonNegative()))
5995 bool ResolveKnownElts);
6005 bool ResolveKnownElts) {
6009 MVT VT =
N.getSimpleValueType();
6013 if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0)
6016 unsigned NumSizeInBytes = NumSizeInBits / 8;
6017 unsigned NumBytesPerElt = NumBitsPerElt / 8;
6019 unsigned Opcode =
N.getOpcode();
6023 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(
N)->getMask();
6025 Mask.append(ShuffleMask.
begin(), ShuffleMask.
end());
6040 uint64_t ZeroMask = IsAndN ? 255 : 0;
6047 assert(UndefElts.
isZero() &&
"Unexpected UNDEF element in AND/ANDNP mask");
6048 for (
int i = 0, e = (
int)EltBits.
size(); i != e; ++i) {
6049 const APInt &ByteBits = EltBits[i];
6050 if (ByteBits != 0 && ByteBits != 255)
6075 size_t MaskSize = std::max(SrcMask0.
size(), SrcMask1.
size());
6079 for (
int i = 0; i != (int)MaskSize; ++i) {
6089 Mask.push_back(i + MaskSize);
6102 if (!
N->isOnlyUserOf(Sub.
getNode()))
6105 uint64_t InsertIdx =
N.getConstantOperandVal(2);
6112 unsigned MaxElts = std::max(NumElts, NumSubSrcBCElts);
6113 assert((MaxElts % NumElts) == 0 && (MaxElts % NumSubSrcBCElts) == 0 &&
6114 "Subvector valuetype mismatch");
6115 InsertIdx *= (MaxElts / NumElts);
6116 ExtractIdx *= (MaxElts / NumSubSrcBCElts);
6117 NumSubElts *= (MaxElts / NumElts);
6118 bool SrcIsUndef = Src.isUndef();
6119 for (
int i = 0; i != (int)MaxElts; ++i)
6121 for (
int i = 0; i != (int)NumSubElts; ++i)
6122 Mask[InsertIdx + i] = (SrcIsUndef ? 0 : MaxElts) + ExtractIdx + i;
6131 if (
Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) &&
6132 NumBitsPerElt == 64 && NumSizeInBits == 512 &&
6134 Src.getOperand(0).isUndef() &&
6135 Src.getOperand(1).getValueType() == SubVT &&
6136 Src.getConstantOperandVal(2) == 0) {
6137 for (
int i = 0; i != (int)NumSubElts; ++i)
6139 for (
int i = 0; i != (int)NumSubElts; ++i)
6140 Mask.push_back(i + NumElts);
6155 Depth + 1, ResolveKnownElts))
6165 if (SubMask.
size() != NumSubElts) {
6166 assert(((SubMask.
size() % NumSubElts) == 0 ||
6167 (NumSubElts % SubMask.
size()) == 0) &&
"Illegal submask scale");
6168 if ((NumSubElts % SubMask.
size()) == 0) {
6169 int Scale = NumSubElts / SubMask.
size();
6172 SubMask = ScaledSubMask;
6174 int Scale = SubMask.
size() / NumSubElts;
6175 NumSubElts = SubMask.
size();
6185 for (
int i = 0; i != (int)NumElts; ++i)
6187 for (
int i = 0; i != (int)NumSubElts; ++i) {
6190 int InputIdx = M / NumSubElts;
6191 M = (NumElts * (1 + InputIdx)) + (M % NumSubElts);
6193 Mask[i + InsertIdx] = M;
6205 unsigned DstIdx = 0;
6208 if (!isa<ConstantSDNode>(
N.getOperand(2)) ||
6209 N.getConstantOperandAPInt(2).uge(NumElts))
6211 DstIdx =
N.getConstantOperandVal(2);
6216 for (
unsigned i = 0; i != NumElts; ++i)
6236 if ((MinBitsPerElt % 8) != 0)
6247 if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.
getOperand(1)))
6256 unsigned DstByte = DstIdx * NumBytesPerElt;
6267 for (
int i = 0; i != (int)NumSizeInBytes; ++i)
6268 Mask.push_back(NumSizeInBytes + i);
6271 unsigned MinBytesPerElts = MinBitsPerElt / 8;
6272 MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt);
6273 for (
unsigned i = 0; i != MinBytesPerElts; ++i)
6274 Mask[DstByte + i] = SrcByte + i;
6275 for (
unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i)
6285 "Unexpected input value type");
6287 APInt EltsLHS, EltsRHS;
6292 bool Offset0 =
false, Offset1 =
false;
6321 bool IsUnary = (N0 == N1);
6329 if (Offset0 || Offset1) {
6331 if ((Offset0 &&
isInRange(M, 0, NumElts)) ||
6332 (Offset1 &&
isInRange(M, NumElts, 2 * NumElts)))
6349 EVT SrcVT = Src.getValueType();
6356 unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt;
6357 assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 &&
"Illegal truncation");
6358 for (
unsigned i = 0; i != NumSrcElts; ++i)
6359 Mask.push_back(i * Scale);
6368 if (!Amt || (*Amt % 8) != 0)
6378 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6379 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6380 Mask[i + j] = i + j - ByteShift;
6382 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6383 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6384 Mask[i + j - ByteShift] = i + j;
6390 uint64_t ShiftVal =
N.getConstantOperandVal(1);
6392 if (NumBitsPerElt <= ShiftVal) {
6398 if ((ShiftVal % 8) != 0)
6408 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6409 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6410 Mask[i + j] = i + j - ByteShift;
6412 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6413 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6414 Mask[i + j - ByteShift] = i + j;
6421 uint64_t RotateVal =
N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
6422 if ((RotateVal % 8) != 0)
6425 int Offset = RotateVal / 8;
6427 for (
int i = 0; i != (int)NumElts; ++i) {
6428 int BaseIdx = i * NumBytesPerElt;
6429 for (
int j = 0; j != (int)NumBytesPerElt; ++j) {
6430 Mask.push_back(BaseIdx + ((
Offset + j) % NumBytesPerElt));
6437 if (!Src.getSimpleValueType().isVector()) {
6440 Src.getOperand(0).getValueType().getScalarType() !=
6443 Src = Src.getOperand(0);
6446 Mask.append(NumElts, 0);
6451 EVT SrcVT = Src.getValueType();
6456 (NumBitsPerSrcElt % 8) != 0)
6460 APInt DemandedSrcElts =
6465 assert((NumBitsPerElt % NumBitsPerSrcElt) == 0 &&
"Unexpected extension");
6466 unsigned Scale = NumBitsPerElt / NumBitsPerSrcElt;
6467 for (
unsigned I = 0;
I != NumElts; ++
I)
6468 Mask.append(Scale,
I);
6477 EVT SrcVT = Src.getValueType();
6499 int MaskWidth = Mask.size();
6501 for (
int i = 0, e = Inputs.
size(); i < e; ++i) {
6502 int lo = UsedInputs.
size() * MaskWidth;
6503 int hi = lo + MaskWidth;
6508 if ((lo <= M) && (M < hi))
6512 if (
none_of(Mask, [lo, hi](
int i) {
return (lo <= i) && (i < hi); })) {
6520 bool IsRepeat =
false;
6521 for (
int j = 0, ue = UsedInputs.
size(); j != ue; ++j) {
6522 if (UsedInputs[j] != Inputs[i])
6526 M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
6535 Inputs = UsedInputs;
6546 bool ResolveKnownElts) {
6550 EVT VT =
Op.getValueType();
6555 if (ResolveKnownElts)
6560 ResolveKnownElts)) {
6571 bool ResolveKnownElts) {
6572 APInt KnownUndef, KnownZero;
6574 KnownZero, DAG,
Depth, ResolveKnownElts);
6580 bool ResolveKnownElts =
true) {
6581 EVT VT =
Op.getValueType();
6585 unsigned NumElts =
Op.getValueType().getVectorNumElements();
6597 "Unknown broadcast load type");
6608 Opcode,
DL, Tys, Ops, MemVT,
6622 EVT VT =
Op.getValueType();
6623 unsigned Opcode =
Op.getOpcode();
6627 if (
auto *SV = dyn_cast<ShuffleVectorSDNode>(
Op)) {
6628 int Elt = SV->getMaskElt(Index);
6633 SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
6647 int Elt = ShuffleMask[Index];
6654 assert(0 <= Elt && Elt < (2 * NumElems) &&
"Shuffle index out of range");
6663 uint64_t SubIdx =
Op.getConstantOperandVal(2);
6666 if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
6673 EVT SubVT =
Op.getOperand(0).getValueType();
6675 uint64_t SubIdx = Index / NumSubElts;
6676 uint64_t SubElt = Index % NumSubElts;
6683 uint64_t SrcIdx =
Op.getConstantOperandVal(1);
6690 EVT SrcVT = Src.getValueType();
6701 isa<ConstantSDNode>(
Op.getOperand(2))) {
6702 if (
Op.getConstantOperandAPInt(2) == Index)
6703 return Op.getOperand(1);
6708 return (Index == 0) ?
Op.getOperand(0)
6712 return Op.getOperand(Index);
6719 const APInt &NonZeroMask,
6720 unsigned NumNonZero,
unsigned NumZero,
6723 MVT VT =
Op.getSimpleValueType();
6726 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.
hasSSE41())) &&
6727 "Illegal vector insertion");
6732 for (
unsigned i = 0; i < NumElts; ++i) {
6733 bool IsNonZero = NonZeroMask[i];
6742 if (NumZero || 0 != i)
6745 assert(0 == i &&
"Expected insertion into zero-index");
6761 const APInt &NonZeroMask,
6762 unsigned NumNonZero,
unsigned NumZero,
6765 if (NumNonZero > 8 && !Subtarget.
hasSSE41())
6779 for (
unsigned I = 0;
I != 4; ++
I) {
6780 if (!NonZeroMask[
I])
6788 assert(V &&
"Failed to fold v16i8 vector to zero");
6793 for (
unsigned i = V ? 4 : 0; i < 16; i += 2) {
6794 bool ThisIsNonZero = NonZeroMask[i];
6795 bool NextIsNonZero = NonZeroMask[i + 1];
6796 if (!ThisIsNonZero && !NextIsNonZero)
6800 if (ThisIsNonZero) {
6801 if (NumZero || NextIsNonZero)
6807 if (NextIsNonZero) {
6809 if (i == 0 && NumZero)
6825 if (i != 0 || NumZero)
6843 const APInt &NonZeroMask,
6844 unsigned NumNonZero,
unsigned NumZero,
6847 if (NumNonZero > 4 && !Subtarget.
hasSSE41())
6863 if (Subtarget.
hasSSE3() && !Subtarget.hasXOP() &&
6864 Op.getOperand(0) ==
Op.getOperand(2) &&
6865 Op.getOperand(1) ==
Op.getOperand(3) &&
6866 Op.getOperand(0) !=
Op.getOperand(1)) {
6867 MVT VT =
Op.getSimpleValueType();
6871 SDValue Ops[4] = {
Op.getOperand(0),
Op.getOperand(1),
6879 std::bitset<4> Zeroable, Undefs;
6880 for (
int i = 0; i < 4; ++i) {
6885 assert(Zeroable.size() - Zeroable.count() > 1 &&
6886 "We expect at least two non-zero elements!");
6891 unsigned FirstNonZeroIdx;
6892 for (
unsigned i = 0; i < 4; ++i) {
6903 if (!FirstNonZero.
getNode()) {
6905 FirstNonZeroIdx = i;
6909 assert(FirstNonZero.
getNode() &&
"Unexpected build vector of all zeros!");
6915 unsigned EltMaskIdx, EltIdx;
6917 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6918 if (Zeroable[EltIdx]) {
6920 Mask[EltIdx] = EltIdx+4;
6924 Elt =
Op->getOperand(EltIdx);
6927 if (Elt.
getOperand(0) != V1 || EltMaskIdx != EltIdx)
6929 Mask[EltIdx] = EltIdx;
6934 SDValue VZeroOrUndef = (Zeroable == Undefs)
6947 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6950 bool CanFold =
true;
6951 for (
unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6965 assert(V1.
getNode() &&
"Expected at least two non-zero elements!");
6968 if (V2.getSimpleValueType() != MVT::v4f32)
6972 unsigned ZMask = Zeroable.to_ulong();
6974 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6975 assert((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!");
6987 MVT ShVT = MVT::v16i8;
6990 assert(NumBits % 8 == 0 &&
"Only support byte sized shifts");
7005 EVT PVT = LD->getValueType(0);
7006 if (PVT != MVT::i32 && PVT != MVT::f32)
7012 FI = FINode->getIndex();
7015 isa<FrameIndexSDNode>(
Ptr.getOperand(0))) {
7016 FI = cast<FrameIndexSDNode>(
Ptr.getOperand(0))->getIndex();
7026 SDValue Chain = LD->getChain();
7030 if (!InferredAlign || *InferredAlign < RequiredAlign) {
7047 int64_t StartOffset =
Offset & ~int64_t(RequiredAlign.
value() - 1);
7054 int EltNo = (
Offset - StartOffset) >> 2;
7059 LD->getPointerInfo().getWithOffset(StartOffset));
7072 auto *BaseLd = cast<LoadSDNode>(Elt);
7073 if (!BaseLd->isSimple())
7086 if (
auto *AmtC = dyn_cast<ConstantSDNode>(Elt.
getOperand(1))) {
7087 uint64_t Amt = AmtC->getZExtValue();
7089 ByteOffset += Amt / 8;
7095 if (
auto *IdxC = dyn_cast<ConstantSDNode>(Elt.
getOperand(1))) {
7097 unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
7099 if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
7102 ByteOffset +=
Idx * (SrcSizeInBits / 8);
7120 bool IsAfterLegalize) {
7124 unsigned NumElems = Elts.
size();
7126 int LastLoadedElt = -1;
7136 for (
unsigned i = 0; i < NumElems; ++i) {
7155 if (!
findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0)
7157 unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0);
7158 if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits)
7166 "Incomplete element masks");
7169 if (UndefMask.
popcount() == NumElems)
7180 "Register/Memory size mismatch");
7182 assert(LDBase &&
"Did not find base load for merging consecutive loads");
7184 unsigned BaseSizeInBytes = BaseSizeInBits / 8;
7185 int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt);
7186 int LoadSizeInBits = NumLoadedElts * BaseSizeInBits;
7187 assert((BaseSizeInBits % 8) == 0 &&
"Sub-byte element loads detected");
7190 if (ByteOffsets[FirstLoadedElt] != 0)
7197 int64_t ByteOffset = ByteOffsets[EltIdx];
7198 if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
7199 int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
7200 return (0 <= BaseIdx && BaseIdx < (
int)NumElems && LoadMask[BaseIdx] &&
7201 Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0);
7204 EltIdx - FirstLoadedElt);
7210 bool IsConsecutiveLoad =
true;
7211 bool IsConsecutiveLoadWithZeros =
true;
7212 for (
int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
7214 if (!CheckConsecutiveLoad(LDBase, i)) {
7215 IsConsecutiveLoad =
false;
7216 IsConsecutiveLoadWithZeros =
false;
7219 }
else if (ZeroMask[i]) {
7220 IsConsecutiveLoad =
false;
7227 "Cannot merge volatile or atomic loads.");
7232 for (
auto *LD : Loads)
7247 if (FirstLoadedElt == 0 &&
7248 (NumLoadedElts == (
int)NumElems || IsDereferenceable) &&
7249 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
7260 return DAG.
getBitcast(VT, Elts[FirstLoadedElt]);
7263 return CreateLoad(VT, LDBase);
7267 if (!IsAfterLegalize && VT.
isVector()) {
7269 if ((NumMaskElts % NumElems) == 0) {
7270 unsigned Scale = NumMaskElts / NumElems;
7272 for (
unsigned i = 0; i < NumElems; ++i) {
7275 int Offset = ZeroMask[i] ? NumMaskElts : 0;
7276 for (
unsigned j = 0; j != Scale; ++j)
7277 ClearMask[(i * Scale) + j] = (i * Scale) + j +
Offset;
7279 SDValue V = CreateLoad(VT, LDBase);
7289 unsigned HalfNumElems = NumElems / 2;
7295 DAG, Subtarget, IsAfterLegalize);
7303 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
7304 ((LoadSizeInBits == 16 && Subtarget.hasFP16()) || LoadSizeInBits == 32 ||
7305 LoadSizeInBits == 64) &&
7312 if (!Subtarget.
hasSSE2() && VT == MVT::v4f32)
7320 for (
auto *LD : Loads)
7331 for (
unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
7332 unsigned RepeatSize = SubElems * BaseSizeInBits;
7333 unsigned ScalarSize = std::min(RepeatSize, 64u);
7334 if (!Subtarget.
hasAVX2() && ScalarSize < 32)
7339 if (RepeatSize > ScalarSize && SubElems == 1)
7344 for (
unsigned i = 0; i != NumElems &&
Match; ++i) {
7348 if (RepeatedLoads[i % SubElems].
isUndef())
7349 RepeatedLoads[i % SubElems] = Elt;
7351 Match &= (RepeatedLoads[i % SubElems] == Elt);
7356 Match &= !RepeatedLoads.
back().isUndef();
7364 if (RepeatSize > ScalarSize)
7366 RepeatSize / ScalarSize);
7372 RepeatVT, RepeatedLoads,
DL, DAG, Subtarget, IsAfterLegalize)) {
7373 SDValue Broadcast = RepeatLoad;
7374 if (RepeatSize > ScalarSize) {
7402 bool IsAfterLegalize) {
7421 auto getConstantScalar = [&](
const APInt &Val) ->
Constant * {
7423 if (ScalarSize == 16)
7425 if (ScalarSize == 32)
7427 assert(ScalarSize == 64 &&
"Unsupported floating point scalar size");
7434 for (
unsigned I = 0, E = Bits.size();
I != E; ++
I)
7436 : getConstantScalar(Bits[
I]));
7445 auto getConstantScalar = [&](
const APInt &Val) ->
Constant * {
7447 if (ScalarSize == 16)
7449 if (ScalarSize == 32)
7451 assert(ScalarSize == 64 &&
"Unsupported floating point scalar size");
7457 if (ScalarSize == SplatBitSize)
7458 return getConstantScalar(SplatValue);
7460 unsigned NumElm = SplatBitSize / ScalarSize;
7462 for (
unsigned I = 0;
I != NumElm; ++
I) {
7464 ConstantVec.
push_back(getConstantScalar(Val));
7470 for (
auto *U :
N->users()) {
7471 unsigned Opc = U->getOpcode();
7481 if (
N->hasOneUse()) {
7513 "Unsupported vector type for broadcast.");
7520 assert((NumElts % Sequence.size()) == 0 &&
"Sequence doesn't fit.");
7521 if (Sequence.size() == 1)
7531 if (!Sequence.empty() && Subtarget.hasCDI()) {
7533 unsigned SeqLen = Sequence.size();
7534 bool UpperZeroOrUndef =
7539 if (UpperZeroOrUndef && ((Op0.getOpcode() ==
ISD::BITCAST) ||
7544 : Op0.getOperand(0).getOperand(0);
7547 if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) ||
7548 (EltType == MVT::i32 && MaskVT == MVT::v16i1)) {
7562 unsigned NumUndefElts = UndefElements.
count();
7563 if (!Ld || (NumElts - NumUndefElts) <= 1) {
7564 APInt SplatValue, Undef;
7565 unsigned SplatBitSize;
7568 if (BVOp->
isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
7578 if (SplatBitSize == 32 || SplatBitSize == 64 ||
7579 (SplatBitSize < 32 && Subtarget.
hasAVX2())) {
7586 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
7596 if (SplatBitSize > 64) {
7602 Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
7608 Ops, VVT, MPI, Alignment,
7618 if (!Ld || NumElts - NumUndefElts != 1)
7621 if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64)))
7625 bool ConstSplatVal =
7653 if (ConstSplatVal && (Subtarget.
hasAVX2() || OptForSize)) {
7661 if (ScalarSize == 32 ||
7662 (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
7663 (CVT == MVT::f16 && Subtarget.
hasAVX2()) ||
7664 (OptForSize && (ScalarSize == 64 || Subtarget.
hasAVX2()))) {
7667 C = CI->getConstantIntValue();
7669 C = CF->getConstantFPValue();
7671 assert(
C &&
"Invalid constant type");
7675 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
7688 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
7699 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
7700 (Subtarget.hasVLX() && ScalarSize == 64)) {
7701 auto *LN = cast<LoadSDNode>(Ld);
7703 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7706 LN->getMemoryVT(), LN->getMemOperand());
7714 (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) {
7715 auto *LN = cast<LoadSDNode>(Ld);
7717 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7720 LN->getMemoryVT(), LN->getMemOperand());
7725 if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256)
7740 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
7761 ExtractedFromVec = ShuffleVec;
7769 MVT VT =
Op.getSimpleValueType();
7782 for (
unsigned i = 0; i != NumElems; ++i) {
7783 unsigned Opc =
Op.getOperand(i).getOpcode();
7790 if (InsertIndices.
size() > 1)
7797 SDValue ExtractedFromVec =
Op.getOperand(i).getOperand(0);
7798 SDValue ExtIdx =
Op.getOperand(i).getOperand(1);
7801 if (!isa<ConstantSDNode>(ExtIdx))
7810 VecIn1 = ExtractedFromVec;
7811 else if (VecIn1 != ExtractedFromVec) {
7813 VecIn2 = ExtractedFromVec;
7814 else if (VecIn2 != ExtractedFromVec)
7819 if (ExtractedFromVec == VecIn1)
7821 else if (ExtractedFromVec == VecIn2)
7822 Mask[i] =
Idx + NumElems;
7831 for (
unsigned Idx : InsertIndices)
7841 MVT VT =
Op.getSimpleValueType();
7857 MVT VT =
Op.getSimpleValueType();
7859 "Unexpected type in LowerBUILD_VECTORvXi1!");
7866 bool IsSplat =
true;
7867 bool HasConstElts =
false;
7873 if (
auto *InC = dyn_cast<ConstantSDNode>(In)) {
7874 Immediate |= (InC->getZExtValue() & 0x1) << idx;
7875 HasConstElts =
true;
7881 else if (In !=
Op.getOperand(SplatIdx))
7892 assert(
Cond.getValueType() == MVT::i8 &&
"Unexpected VT!");
7898 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7919 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7936 for (
unsigned InsertIdx : NonConstIdx) {
7938 Op.getOperand(InsertIdx),
7979 unsigned BaseIdx,
unsigned LastIdx,
7981 EVT VT =
N->getValueType(0);
7983 assert(BaseIdx * 2 <= LastIdx &&
"Invalid Indices in input!");
7985 "Invalid Vector in input!");
7988 bool CanFold =
true;
7989 unsigned ExpectedVExtractIdx = BaseIdx;
7990 unsigned NumElts = LastIdx - BaseIdx;
7995 for (
unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
7999 if (
Op->isUndef()) {
8001 if (i * 2 == NumElts)
8002 ExpectedVExtractIdx = BaseIdx;
8003 ExpectedVExtractIdx += 2;
8007 CanFold =
Op->getOpcode() == Opcode &&
Op->hasOneUse();
8028 if (i * 2 < NumElts) {
8040 if (i * 2 == NumElts)
8041 ExpectedVExtractIdx = BaseIdx;
8045 if (I0 == ExpectedVExtractIdx)
8047 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
8054 ExpectedVExtractIdx += 2;
8093 unsigned X86Opcode,
bool Mode,
8094 bool isUndefLO,
bool isUndefHI) {
8097 "Invalid nodes in input!");
8111 if (!isUndefLO && !V0->
isUndef())
8112 LO = DAG.
getNode(X86Opcode,
DL, NewVT, V0_LO, V0_HI);
8113 if (!isUndefHI && !V1->
isUndef())
8114 HI = DAG.
getNode(X86Opcode,
DL, NewVT, V1_LO, V1_HI);
8118 LO = DAG.
getNode(X86Opcode,
DL, NewVT, V0_LO, V1_LO);
8121 HI = DAG.
getNode(X86Opcode,
DL, NewVT, V0_HI, V1_HI);
8135 unsigned &NumExtracts,
8152 unsigned Opc[2] = {0, 0};
8153 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
8157 unsigned Opcode =
Op.getOpcode();
8183 if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode)
8185 Opc[i % 2] = Opcode;
8222 if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
8262 unsigned ExpectedUses) {
8292 unsigned NumExtracts;
8304 return DAG.
getNode(Opc,
DL, VT, Opnd0, Opnd1, Opnd2);
8317 Mask.push_back(
I + E + 1);
8341 unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
8342 unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
8343 for (
unsigned i = 0; i != Num128BitChunks; ++i) {
8344 for (
unsigned j = 0; j != NumEltsIn128Bits; ++j) {
8356 GenericOpcode =
Op.getOpcode();
8357 switch (GenericOpcode) {
8363 default:
return false;
8374 !isa<ConstantSDNode>(Op1.
getOperand(1)) || !
Op.hasOneUse())
8379 if (j < NumEltsIn64Bits) {
8387 SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
8394 unsigned ExpectedIndex = i * NumEltsIn128Bits +
8395 (j % NumEltsIn64Bits) * 2;
8396 if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
8405 if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
8436 for (
unsigned i = 0; i != NumElts; ++i)
8441 unsigned HalfNumElts = NumElts / 2;
8450 return DAG.
getNode(HOpcode,
DL, VT, V0, V1);
8458 unsigned NumNonUndefs =
8460 if (NumNonUndefs < 2)
8467 if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.
hasSSE3()) ||
8468 ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.
hasSSSE3()) ||
8469 ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.
hasAVX()) ||
8470 ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.
hasAVX2())) {
8483 unsigned Half = NumElts / 2;
8484 unsigned NumUndefsLO = 0;
8485 unsigned NumUndefsHI = 0;
8486 for (
unsigned i = 0, e = Half; i != e; ++i)
8490 for (
unsigned i = Half, e = NumElts; i != e; ++i)
8495 if (VT == MVT::v8i32 || VT == MVT::v16i16) {
8498 bool CanFold =
true;
8519 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
8528 bool isUndefLO = NumUndefsLO == Half;
8529 bool isUndefHI = NumUndefsHI == Half;
8535 if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
8536 VT == MVT::v16i16) {
8555 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
8560 bool isUndefLO = NumUndefsLO == Half;
8561 bool isUndefHI = NumUndefsHI == Half;
8563 isUndefLO, isUndefHI);
8581 MVT VT =
Op->getSimpleValueType(0);
8587 unsigned Opcode =
Op->getOperand(0).getOpcode();
8588 for (
unsigned i = 1; i < NumElems; ++i)
8589 if (Opcode !=
Op->getOperand(i).getOpcode())
8593 bool IsShift =
false;
8607 if (
Op->getSplatValue())
8620 if (!isa<ConstantSDNode>(
RHS))
8637 if (IsShift &&
any_of(RHSElts, [&](
SDValue V) {
return RHSElts[0] != V; }))
8658 MVT VT =
Op.getSimpleValueType();
8668 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
8690 "Illegal variable permute mask size");
8698 SDLoc(IndicesVec), SizeInBits);
8702 IndicesVT, IndicesVec);
8714 Subtarget, DAG,
SDLoc(IndicesVec));
8729 EVT SrcVT =
Idx.getValueType();
8739 for (
uint64_t i = 0; i != Scale; ++i) {
8740 IndexScale |= Scale << (i * NumDstBits);
8741 IndexOffset |= i << (i * NumDstBits);
8751 unsigned Opcode = 0;
8760 if (Subtarget.hasVLX() && Subtarget.hasBWI())
8764 ShuffleVT = MVT::v16i8;
8769 if (Subtarget.
hasAVX()) {
8771 ShuffleVT = MVT::v4f32;
8774 ShuffleVT = MVT::v16i8;
8779 if (Subtarget.
hasAVX()) {
8783 ShuffleVT = MVT::v2f64;
8789 DAG.getVectorShuffle(VT,
DL, SrcVec, SrcVec, {0, 0}),
8795 if (Subtarget.hasVLX() && Subtarget.hasVBMI())
8797 else if (Subtarget.hasXOP()) {
8806 }
else if (Subtarget.hasAVX()) {
8817 EVT VT =
Idx.getValueType();
8823 SDValue Ops[] = {LoLo, HiHi, IndicesVec};
8829 if (Subtarget.hasVLX() && Subtarget.hasBWI())
8831 else if (Subtarget.hasAVX()) {
8833 IndicesVec = ScaleIndices(IndicesVec, 2);
8836 MVT::v32i8, DAG.
getBitcast(MVT::v32i8, SrcVec),
8837 DAG.
getBitcast(MVT::v32i8, IndicesVec),
DL, DAG, Subtarget));
8842 if (Subtarget.hasAVX2())
8844 else if (Subtarget.hasAVX()) {
8847 {0, 1, 2, 3, 0, 1, 2, 3});
8849 {4, 5, 6, 7, 4, 5, 6, 7});
8850 if (Subtarget.hasXOP())
8866 if (Subtarget.hasAVX512()) {
8867 if (!Subtarget.hasVLX()) {
8869 SrcVec =
widenSubVector(WidenSrcVT, SrcVec,
false, Subtarget, DAG,
8871 IndicesVec =
widenSubVector(MVT::v8i64, IndicesVec,
false, Subtarget,
8872 DAG,
SDLoc(IndicesVec));
8878 }
else if (Subtarget.hasAVX()) {
8886 if (Subtarget.hasXOP())
8901 if (Subtarget.hasVBMI())
8905 if (Subtarget.hasBWI())
8912 if (Subtarget.hasAVX512())
8921 "Illegal variable permute shuffle type");
8925 IndicesVec = ScaleIndices(IndicesVec, Scale);
8928 IndicesVec = DAG.
getBitcast(ShuffleIdxVT, IndicesVec);
8932 ? DAG.
getNode(Opcode,
DL, ShuffleVT, IndicesVec, SrcVec)
8933 : DAG.
getNode(Opcode,
DL, ShuffleVT, SrcVec, IndicesVec);
8956 for (
unsigned Idx = 0, E = V.getNumOperands();
Idx != E; ++
Idx) {
8965 SrcVec =
Op.getOperand(0);
8966 else if (SrcVec !=
Op.getOperand(0))
8968 SDValue ExtractedIndex =
Op->getOperand(1);
8972 ExtractedIndex = ExtractedIndex.
getOperand(0);
8981 else if (IndicesVec != ExtractedIndex.
getOperand(0))
8984 auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.
getOperand(1));
8985 if (!PermIdx || PermIdx->getAPIntValue() !=
Idx)
8989 MVT VT = V.getSimpleValueType();
8997 MVT VT =
Op.getSimpleValueType();
8999 MVT OpEltVT =
Op.getOperand(0).getSimpleValueType();
9007 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
9018 bool IsAllConstants =
true;
9019 bool OneUseFrozenUndefs =
true;
9021 unsigned NumConstants = NumElems;
9022 for (
unsigned i = 0; i < NumElems; ++i) {
9029 OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->
hasOneUse();
9030 FrozenUndefMask.
setBit(i);
9035 IsAllConstants =
false;
9050 if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes())
9054 if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes())
9062 if (
unsigned NumFrozenUndefElts = FrozenUndefMask.
popcount();
9063 NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
9066 for (
unsigned i = 0; i < NumElems; ++i) {
9072 if (!FrozenUndefMask[i])
9073 Elts[i] =
Op.getOperand(i);
9075 BlendMask[i] += NumElems;
9090 unsigned UpperElems = NumElems / 2;
9091 APInt UndefOrZeroMask = FrozenUndefMask | UndefMask | ZeroMask;
9092 unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.
countl_one();
9093 if (NumUpperUndefsOrZeros >= UpperElems) {
9095 NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))
9096 UpperElems = NumElems - (NumElems / 4);
9098 bool UndefUpper = UndefMask.
countl_one() >= UpperElems;
9102 return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl);
9109 return HorizontalOp;
9115 unsigned NumZero = ZeroMask.
popcount();
9116 unsigned NumNonZero = NonZeroMask.
popcount();
9124 if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
9125 FrozenUndefMask.
isZero() &&
9132 Type *EltType =
Op.getValueType().getScalarType().getTypeForEVT(Context);
9136 for (
unsigned i = 0; i != NumElems; ++i) {
9138 if (
auto *
C = dyn_cast<ConstantSDNode>(Elt))
9139 ConstVecOps[i] = ConstantInt::get(Context,
C->getAPIntValue());
9140 else if (
auto *
C = dyn_cast<ConstantFPSDNode>(Elt))
9141 ConstVecOps[i] = ConstantFP::get(Context,
C->getValueAPF());
9144 "Expected one variable element in this vector");
9158 SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG);
9164 if (InsertC < NumEltsInLow128Bits)
9170 assert(Subtarget.
hasAVX() &&
"Must have AVX with >16-byte vector");
9173 for (
unsigned i = 0; i != NumElts; ++i)
9174 ShuffleMask.
push_back(i == InsertC ? NumElts : i);
9180 if (NumNonZero == 1) {
9192 if (EltVT == MVT::i32 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
9193 EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit()) ||
9194 (EltVT == MVT::i16 && Subtarget.hasFP16())) {
9197 "Expected an SSE value type!");
9206 if (EltVT == MVT::i16 || EltVT == MVT::i8) {
9216 if (NumElems == 2 &&
Idx == 1 &&
9222 VT,
Op.getOperand(1)),
9223 NumBits/2, DAG, *
this, dl);
9234 if (EVTBits == 32) {
9241 if (Values.
size() == 1) {
9242 if (EVTBits == 32) {
9249 if (
Op.getNode()->isOnlyUserOf(Item.
getNode()))
9274 if (Subtarget.
hasAVX2() && EVTBits == 32 && Values.
size() == 2) {
9275 SDValue Ops[4] = {
Op.getOperand(0),
Op.getOperand(1),
9279 for (
unsigned i = 2; i != NumElems; ++i)
9280 if (Ops[i % 2] !=
Op.getOperand(i))
9284 if (CanSplat(
Op, NumElems, Ops)) {
9306 HVT, dl,
Op->ops().slice(NumElems / 2, NumElems /2));
9313 if (EVTBits == 64) {
9314 if (NumNonZero == 1) {
9318 Op.getOperand(
Idx));
9325 if (EVTBits == 8 && NumElems == 16)
9327 NumZero, DAG, Subtarget))
9330 if (EltVT == MVT::i16 && NumElems == 8)
9332 NumZero, DAG, Subtarget))
9336 if (EVTBits == 32 && NumElems == 4)
9341 if (NumElems == 4 && NumZero > 0) {
9343 for (
unsigned i = 0; i < 4; ++i) {
9344 bool isZero = !NonZeroMask[i];
9351 for (
unsigned i = 0; i < 2; ++i) {
9358 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
9361 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
9364 Ops[i] =
getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
9374 static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
9375 static_cast<int>(Reverse2 ? NumElems : NumElems+1)
9380 assert(Values.
size() > 1 &&
"Expected non-undef and non-splat vector");
9387 if (Subtarget.
hasSSE41() && EltVT != MVT::f16) {
9389 if (!
Op.getOperand(0).isUndef())
9394 for (
unsigned i = 1; i < NumElems; ++i) {
9395 if (
Op.getOperand(i).isUndef())
continue;
9406 for (
unsigned i = 0; i < NumElems; ++i) {
9407 if (!
Op.getOperand(i).isUndef())
9417 for (
unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
9420 for(
unsigned i = 0; i != Scale; ++i)
9422 for (
unsigned i = 0; i != Scale; ++i)
9423 Mask.push_back(NumElems+i);
9426 for (
unsigned i = 0, e = NumElems / (2 * Scale); i !=
e; ++i)
9438 MVT ResVT =
Op.getSimpleValueType();
9441 ResVT.
is512BitVector()) &&
"Value type must be 256-/512-bit wide");
9444 unsigned NumFreezeUndef = 0;
9445 unsigned NumZero = 0;
9446 unsigned NumNonZero = 0;
9447 unsigned NonZeros = 0;
9448 for (
unsigned i = 0; i != NumOperands; ++i) {
9462 assert(i <
sizeof(NonZeros) * CHAR_BIT);
9469 if (NumNonZero > 2) {
9473 Ops.
slice(0, NumOperands/2));
9475 Ops.
slice(NumOperands/2));
9484 MVT SubVT =
Op.getOperand(0).getSimpleValueType();
9486 for (
unsigned i = 0; i != NumOperands; ++i) {
9487 if ((NonZeros & (1 << i)) == 0)
9505 MVT ResVT =
Op.getSimpleValueType();
9509 "Unexpected number of operands in CONCAT_VECTORS");
9513 for (
unsigned i = 0; i != NumOperands; ++i) {
9517 assert(i <
sizeof(NonZeros) * CHAR_BIT);
9529 if (
isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
9530 Log2_64(NonZeros) != NumOperands - 1) {
9554 if (NumOperands > 2) {
9558 Ops.
slice(0, NumOperands / 2));
9560 Ops.
slice(NumOperands / 2));
9579 MVT VT =
Op.getSimpleValueType();
9613 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
9614 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
9615 if (Mask[i] >= 0 && Mask[i] != i)
9627 unsigned ScalarSizeInBits,
9629 assert(LaneSizeInBits && ScalarSizeInBits &&
9630 (LaneSizeInBits % ScalarSizeInBits) == 0 &&
9631 "Illegal shuffle lane size");
9632 int LaneSize = LaneSizeInBits / ScalarSizeInBits;
9633 int Size = Mask.size();
9634 for (
int i = 0; i <
Size; ++i)
9635 if (Mask[i] >= 0 && (Mask[i] %
Size) / LaneSize != i / LaneSize)
9650 unsigned ScalarSizeInBits,
9652 assert(LaneSizeInBits && ScalarSizeInBits &&
9653 (LaneSizeInBits % ScalarSizeInBits) == 0 &&
9654 "Illegal shuffle lane size");
9655 int NumElts = Mask.size();
9656 int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits;
9657 int NumLanes = NumElts / NumEltsPerLane;
9659 for (
int i = 0; i != NumLanes; ++i) {
9661 for (
int j = 0; j != NumEltsPerLane; ++j) {
9662 int M = Mask[(i * NumEltsPerLane) + j];
9665 int Lane = (M % NumElts) / NumEltsPerLane;
9666 if (SrcLane >= 0 && SrcLane != Lane)
9690 RepeatedMask.
assign(LaneSize, -1);
9691 int Size = Mask.size();
9692 for (
int i = 0; i <
Size; ++i) {
9696 if ((Mask[i] %
Size) / LaneSize != i / LaneSize)
9702 int LocalM = Mask[i] <
Size ? Mask[i] % LaneSize
9703 : Mask[i] % LaneSize + LaneSize;
9704 if (RepeatedMask[i % LaneSize] < 0)
9706 RepeatedMask[i % LaneSize] = LocalM;
9707 else if (RepeatedMask[i % LaneSize] != LocalM)
9737 unsigned EltSizeInBits,
9740 int LaneSize = LaneSizeInBits / EltSizeInBits;
9742 int Size = Mask.size();
9743 for (
int i = 0; i <
Size; ++i) {
9753 if ((Mask[i] %
Size) / LaneSize != i / LaneSize)
9759 int LaneM = Mask[i] /
Size;
9760 int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize);
9763 RepeatedMask[i % LaneSize] = LocalM;
9764 else if (RepeatedMask[i % LaneSize] != LocalM)
9777 Mask, RepeatedMask);
9783 int Idx,
int ExpectedIdx) {
9784 assert(0 <=
Idx &&
Idx < MaskSize && 0 <= ExpectedIdx &&
9785 ExpectedIdx < MaskSize &&
"Out of range element index");
9786 if (!
Op || !ExpectedOp ||
Op.getOpcode() != ExpectedOp.
getOpcode())
9789 switch (
Op.getOpcode()) {
9801 return (
Op == ExpectedOp &&
9802 (
int)
Op.getValueType().getVectorNumElements() == MaskSize);
9812 if (
Op == ExpectedOp &&
Op.getOperand(0) ==
Op.getOperand(1)) {
9813 MVT VT =
Op.getSimpleValueType();
9815 if (MaskSize == NumElts) {
9817 int NumEltsPerLane = NumElts / NumLanes;
9818 int NumHalfEltsPerLane = NumEltsPerLane / 2;
9820 (
Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane);
9822 (
Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane);
9823 return SameLane && SameElt;
9845 int Size = Mask.size();
9846 if (
Size != (
int)ExpectedMask.
size())
9849 for (
int i = 0; i <
Size; ++i) {
9850 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
9851 int MaskIdx = Mask[i];
9852 int ExpectedIdx = ExpectedMask[i];
9853 if (0 <= MaskIdx && MaskIdx != ExpectedIdx) {
9856 MaskIdx = MaskIdx <
Size ? MaskIdx : (MaskIdx -
Size);
9857 ExpectedIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9879 int Size = Mask.size();
9880 if (
Size != (
int)ExpectedMask.
size())
9884 "Illegal target shuffle mask");
9892 !V1.getValueType().isVector()))
9895 !V2.getValueType().isVector()))
9901 for (
int i = 0; i <
Size; ++i) {
9902 int MaskIdx = Mask[i];
9903 int ExpectedIdx = ExpectedMask[i];
9913 int BitIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9914 APInt &ZeroMask = ExpectedIdx <
Size ? ZeroV1 : ZeroV2;
9922 MaskIdx = MaskIdx <
Size ? MaskIdx : (MaskIdx -
Size);
9923 ExpectedIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9937 if (VT != MVT::v8i32 && VT != MVT::v8f32)
9948 return IsUnpackwdMask;
9962 for (
unsigned i = 0; i != 4; ++i) {
9977 assert(Mask.size() % 2 == 0 &&
"Expecting even number of elements in mask");
9978 unsigned HalfSize = Mask.size() / 2;
9979 for (
unsigned i = 0; i != HalfSize; ++i) {
9980 if (Mask[i] != Mask[i + HalfSize])
9995 assert(Mask.size() == 4 &&
"Only 4-lane shuffle masks");
9996 assert(Mask[0] >= -1 && Mask[0] < 4 &&
"Out of bound mask element!");
9997 assert(Mask[1] >= -1 && Mask[1] < 4 &&
"Out of bound mask element!");
9998 assert(Mask[2] >= -1 && Mask[2] < 4 &&
"Out of bound mask element!");
9999 assert(Mask[3] >= -1 && Mask[3] < 4 &&
"Out of bound mask element!");
10003 int FirstIndex =
find_if(Mask, [](
int M) {
return M >= 0; }) - Mask.begin();
10004 assert(0 <= FirstIndex && FirstIndex < 4 &&
"All undef shuffle mask");
10006 int FirstElt = Mask[FirstIndex];
10007 if (
all_of(Mask, [FirstElt](
int M) {
return M < 0 || M == FirstElt; }))
10008 return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt;
10011 Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
10012 Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
10013 Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4;
10014 Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6;
10026 assert((Mask.size() == 2 || Mask.size() == 4 || Mask.size() == 8) &&
10027 "Unexpected SHUFPD mask size");
10028 assert(
all_of(Mask, [](
int M) {
return -1 <= M && M <= 1; }) &&
10029 "Unexpected SHUFPD mask elements");
10033 int FirstIndex =
find_if(Mask, [](
int M) {
return M >= 0; }) - Mask.begin();
10034 assert(0 <= FirstIndex && FirstIndex < (
int)Mask.size() &&
10035 "All undef shuffle mask");
10037 int FirstElt = Mask[FirstIndex];
10038 if (
all_of(Mask, [FirstElt](
int M) {
return M < 0 || M == FirstElt; }) &&
10039 count_if(Mask, [FirstElt](
int M) {
return M == FirstElt; }) > 1) {
10041 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I)
10042 Imm |= FirstElt <<
I;
10049 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I)
10050 Imm |= (Mask[
I] < 0 ? (
I & 1) : Mask[
I]) <<
I;
10069 bool &IsZeroSideLeft) {
10070 int NextElement = -1;
10072 for (
int i = 0, e = Mask.size(); i < e; i++) {
10074 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
10080 if (NextElement < 0) {
10081 NextElement = Mask[i] != 0 ?
VectorType.getVectorNumElements() : 0;
10082 IsZeroSideLeft = NextElement != 0;
10085 if (NextElement != Mask[i])
10098 int Size = Mask.size();
10112 for (
int i = 0; i < NumBytes; ++i) {
10113 int M = Mask[i / NumEltBytes];
10115 PSHUFBMask[i] = DAG.
getUNDEF(MVT::i8);
10118 if (Zeroable[i / NumEltBytes]) {
10119 PSHUFBMask[i] = ZeroMask;
10125 if (V && V != SrcV)
10131 if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
10135 M = M * NumEltBytes + (i % NumEltBytes);
10138 assert(V &&
"Failed to find a source input");
10153 const APInt &Zeroable,
10156 bool IsLeftZeroSide =
true;
10160 unsigned VEXPANDMask = (~Zeroable).getZExtValue();
10165 assert((NumElts == 4 || NumElts == 8 || NumElts == 16) &&
10166 "Unexpected number of vector elements");
10168 Subtarget, DAG,
DL);
10170 SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
10175 unsigned &UnpackOpcode,
bool IsUnary,
10181 bool Undef1 =
true, Undef2 =
true, Zero1 =
true, Zero2 =
true;
10182 for (
int i = 0; i != NumElts; i += 2) {
10183 int M1 = TargetMask[i + 0];
10184 int M2 = TargetMask[i + 1];
10190 assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) &&
10191 "Zeroable shuffle detected");
10197 (IsUnary ? V1 : V2))) {
10199 V2 = (Undef2 ? DAG.
getUNDEF(VT) : (IsUnary ? V1 : V2));
10200 V1 = (Undef1 ? DAG.
getUNDEF(VT) : V1);
10206 (IsUnary ? V1 : V2))) {
10208 V2 = (Undef2 ? DAG.
getUNDEF(VT) : (IsUnary ? V1 : V2));
10209 V1 = (Undef1 ? DAG.
getUNDEF(VT) : V1);
10214 if (IsUnary && (Zero1 || Zero2)) {
10216 if ((Subtarget.
hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&
10220 bool MatchLo =
true, MatchHi =
true;
10221 for (
int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) {
10222 int M = TargetMask[i];
10225 if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) ||
10229 MatchLo &= (M == Unpckl[i]);
10230 MatchHi &= (M == Unpckh[i]);
10233 if (MatchLo || MatchHi) {
10297 unsigned UnpackOpcode;
10309 DAG.
getUNDEF(MVT::v4f64), {0, 2, 1, 3});
10311 return DAG.
getNode(UnpackOpcode,
DL, VT, V1, V1);
10322 unsigned NumElts = Mask.size();
10324 unsigned MaxScale = 64 / EltSizeInBits;
10326 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10327 unsigned SrcEltBits = EltSizeInBits * Scale;
10328 if (SrcEltBits < 32 && !Subtarget.hasBWI())
10330 unsigned NumSrcElts = NumElts / Scale;
10333 unsigned UpperElts = NumElts - NumSrcElts;
10339 if ((NumSrcElts * EltSizeInBits) >= 128) {
10357 MVT SrcVT = Src.getSimpleValueType();
10367 if (NumSrcElts == NumDstElts)
10370 if (NumSrcElts > NumDstElts) {
10376 if ((NumSrcElts * DstEltSizeInBits) >= 128) {
10393 if (DstVT != TruncVT)
10417 const APInt &Zeroable,
10420 assert((VT == MVT::v16i8 || VT == MVT::v8i16) &&
"Unexpected VTRUNC type");
10426 unsigned MaxScale = 64 / EltSizeInBits;
10427 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10428 unsigned SrcEltBits = EltSizeInBits * Scale;
10429 unsigned NumSrcElts = NumElts / Scale;
10430 unsigned UpperElts = NumElts - NumSrcElts;
10439 Src.getScalarValueSizeInBits() == SrcEltBits) {
10440 Src = Src.getOperand(0);
10441 }
else if (Subtarget.hasVLX()) {
10454 if (!Subtarget.hasBWI() && Src.getScalarValueSizeInBits() < 32)
10467 const APInt &Zeroable,
10471 "Unexpected VTRUNC type");
10477 unsigned MaxScale = 64 / EltSizeInBits;
10478 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10480 unsigned SrcEltBits = EltSizeInBits * Scale;
10481 if (SrcEltBits < 32 && !Subtarget.hasBWI())
10486 unsigned NumHalfSrcElts = NumElts / Scale;
10487 unsigned NumSrcElts = 2 * NumHalfSrcElts;
10494 unsigned UpperElts = NumElts - NumSrcElts;
10495 if (UpperElts > 0 &&
10506 return Lo.getOperand(0) ==
Hi.getOperand(0);
10509 auto *LDLo = cast<LoadSDNode>(
Lo);
10510 auto *LDHi = cast<LoadSDNode>(
Hi);
10512 LDHi, LDLo,
Lo.getValueType().getStoreSize(), 1);
10570 bool IsSingleInput) {
10573 int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
10575 "We should only be called with masks with a power-of-2 size!");
10578 int Offset = MatchEven ? 0 : 1;
10583 bool ViableForN[3] = {
true,
true,
true};
10585 for (
int i = 0, e = Mask.size(); i < e; ++i) {
10591 bool IsAnyViable =
false;
10592 for (
unsigned j = 0; j != std::size(ViableForN); ++j)
10593 if (ViableForN[j]) {
10598 IsAnyViable =
true;
10600 ViableForN[j] =
false;
10607 for (
unsigned j = 0; j != std::size(ViableForN); ++j)
10623 unsigned MaxStages = 1) {
10626 assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 &&
10627 "Illegal maximum compaction");
10630 unsigned NumSrcBits = PackVT.getScalarSizeInBits();
10631 unsigned NumPackedBits = NumSrcBits - BitSize;
10635 unsigned NumBits2 = N2.getScalarValueSizeInBits();
10638 if ((!N1.
isUndef() && !IsZero1 && NumBits1 != NumSrcBits) ||
10639 (!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits))
10641 if (Subtarget.
hasSSE41() || BitSize == 8) {
10654 if ((N1.
isUndef() || IsZero1 || IsAllOnes1 ||
10656 (N2.isUndef() || IsZero2 || IsAllOnes2 ||
10668 for (
unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) {
10676 if (MatchPACK(V1, V2, PackVT))
10683 if (MatchPACK(V1, V1, PackVT))
10695 unsigned PackOpcode;
10698 unsigned MaxStages =
Log2_32(64 / EltBits);
10700 Subtarget, MaxStages))
10704 unsigned NumStages =
Log2_32(CurrentEltBits / EltBits);
10707 if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX())
10712 unsigned MaxPackBits = 16;
10713 if (CurrentEltBits > 16 &&
10719 for (
unsigned i = 0; i != NumStages; ++i) {
10720 unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits);
10721 unsigned NumSrcElts = SizeBits / SrcEltBits;
10729 CurrentEltBits /= 2;
10732 "Failed to lower compaction shuffle");
10742 const APInt &Zeroable,
10749 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
10755 if (EltVT == MVT::f32 || EltVT == MVT::f64) {
10768 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
10771 if (Mask[i] %
Size != i)
10774 V = Mask[i] <
Size ? V1 : V2;
10775 else if (V != (Mask[i] <
Size ? V1 : V2))
10803 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
10804 if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i +
Size)
10820 const APInt &Zeroable,
bool &ForceV1Zero,
10821 bool &ForceV2Zero,
uint64_t &BlendMask) {
10822 bool V1IsZeroOrUndef =
10824 bool V2IsZeroOrUndef =
10828 ForceV1Zero =
false, ForceV2Zero =
false;
10829 assert(Mask.size() <= 64 &&
"Shuffle mask too big for blend mask");
10831 int NumElts = Mask.size();
10833 int NumEltsPerLane = NumElts / NumLanes;
10834 assert((NumLanes * NumEltsPerLane) == NumElts &&
"Value type mismatch");
10838 bool ForceWholeLaneMasks =
10843 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
10845 bool LaneV1InUse =
false;
10846 bool LaneV2InUse =
false;
10848 for (
int LaneElt = 0; LaneElt != NumEltsPerLane; ++LaneElt) {
10849 int Elt = (Lane * NumEltsPerLane) + LaneElt;
10853 if (M == Elt || (0 <= M && M < NumElts &&
10856 LaneV1InUse =
true;
10859 if (M == (Elt + NumElts) ||
10862 LaneBlendMask |= 1ull << LaneElt;
10863 Mask[Elt] = Elt + NumElts;
10864 LaneV2InUse =
true;
10867 if (Zeroable[Elt]) {
10868 if (V1IsZeroOrUndef) {
10869 ForceV1Zero =
true;
10871 LaneV1InUse =
true;
10874 if (V2IsZeroOrUndef) {
10875 ForceV2Zero =
true;
10876 LaneBlendMask |= 1ull << LaneElt;
10877 Mask[Elt] = Elt + NumElts;
10878 LaneV2InUse =
true;
10888 if (ForceWholeLaneMasks && LaneV2InUse && !LaneV1InUse)
10889 LaneBlendMask = (1ull << NumEltsPerLane) - 1;
10891 BlendMask |= LaneBlendMask << (Lane * NumEltsPerLane);
10904 const APInt &Zeroable,
10908 bool ForceV1Zero =
false, ForceV2Zero =
false;
10925 assert(Subtarget.
hasAVX2() &&
"256-bit integer blends require AVX2!");
10929 assert(Subtarget.
hasAVX() &&
"256-bit float blends require AVX!");
10936 assert(Subtarget.
hasSSE41() &&
"128-bit blends require SSE41!");
10939 case MVT::v16i16: {
10940 assert(Subtarget.
hasAVX2() &&
"v16i16 blends require AVX2!");
10944 assert(RepeatedMask.
size() == 8 &&
"Repeated mask size doesn't match!");
10946 for (
int i = 0; i < 8; ++i)
10947 if (RepeatedMask[i] >= 8)
10948 BlendMask |= 1ull << i;
10955 uint64_t LoMask = BlendMask & 0xFF;
10956 uint64_t HiMask = (BlendMask >> 8) & 0xFF;
10957 if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
10963 MVT::v16i16,
DL,
Lo,
Hi,
10964 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
10969 assert(Subtarget.
hasAVX2() &&
"256-bit byte-blends require AVX2!");
10972 assert(Subtarget.
hasSSE41() &&
"128-bit byte-blends require SSE41!");
10979 if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
10986 if (Subtarget.hasVLX())
11019 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
11020 for (
int j = 0; j < Scale; ++j)
11067 bool ImmBlends =
false) {
11073 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
11077 assert(Mask[i] <
Size * 2 &&
"Shuffle input is out of bounds.");
11079 if (BlendMask[Mask[i] %
Size] < 0)
11080 BlendMask[Mask[i] %
Size] = Mask[i];
11081 else if (BlendMask[Mask[i] %
Size] != Mask[i])
11084 PermuteMask[i] = Mask[i] %
Size;
11106 int NumElts = Mask.size();
11108 int NumLaneElts = NumElts / NumLanes;
11109 int NumHalfLaneElts = NumLaneElts / 2;
11111 bool MatchLo =
true, MatchHi =
true;
11115 for (
int Elt = 0; Elt != NumElts; ++Elt) {
11123 if (M < NumElts && (
Op.isUndef() ||
Op == V1))
11125 else if (NumElts <= M && (
Op.isUndef() ||
Op == V2)) {
11131 bool MatchLoAnyLane =
false, MatchHiAnyLane =
false;
11132 for (
int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
11133 int Lo = Lane, Mid = Lane + NumHalfLaneElts,
Hi = Lane + NumLaneElts;
11136 if (MatchLoAnyLane || MatchHiAnyLane) {
11137 assert((MatchLoAnyLane ^ MatchHiAnyLane) &&
11138 "Failed to match UNPCKLO/UNPCKHI");
11142 MatchLo &= MatchLoAnyLane;
11143 MatchHi &= MatchHiAnyLane;
11144 if (!MatchLo && !MatchHi)
11147 assert((MatchLo ^ MatchHi) &&
"Failed to match UNPCKLO/UNPCKHI");
11153 for (
int Elt = 0; Elt != NumElts; ++Elt) {
11160 bool IsFirstOp = M < NumElts;
11162 NumLaneElts * (NormM / NumLaneElts) + (2 * (NormM % NumHalfLaneElts));
11163 if ((IsFirstOp && V1 == Ops[0]) || (!IsFirstOp && V2 == Ops[0]))
11164 PermuteMask[Elt] = BaseMaskElt;
11165 else if ((IsFirstOp && V1 == Ops[1]) || (!IsFirstOp && V2 == Ops[1]))
11166 PermuteMask[Elt] = BaseMaskElt + 1;
11167 assert(PermuteMask[Elt] != -1 &&
11168 "Input mask element is defined but failed to assign permute mask");
11190 int Size = Mask.size();
11191 assert(Mask.size() >= 2 &&
"Single element masks are invalid.");
11202 bool UnpackLo = NumLoInputs >= NumHiInputs;
11204 auto TryUnpack = [&](
int ScalarSize,
int Scale) {
11208 for (
int i = 0; i <
Size; ++i) {
11213 int UnpackIdx = i / Scale;
11217 if ((UnpackIdx % 2 == 0) != (Mask[i] <
Size))
11223 VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 :
Size / 2)] =
11246 UnpackVT, V1, V2));
11252 for (
int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2)
11253 if (
SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
11264 if (NumLoInputs == 0 || NumHiInputs == 0) {
11265 assert((NumLoInputs > 0 || NumHiInputs > 0) &&
11266 "We have to have *some* inputs!");
11267 int HalfOffset = NumLoInputs == 0 ?
Size / 2 : 0;
11275 for (
int i = 0; i <
Size; ++i) {
11279 assert(Mask[i] %
Size >= HalfOffset &&
"Found input from wrong half!");
11282 2 * ((Mask[i] %
Size) - HalfOffset) + (Mask[i] <
Size ? 0 : 1);
11311 int NumEltsPerLane = NumElts / NumLanes;
11314 bool Blend1 =
true;
11315 bool Blend2 =
true;
11316 std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
11317 std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
11318 for (
int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
11319 for (
int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
11320 int M = Mask[Lane + Elt];
11324 Blend1 &= (M == (Lane + Elt));
11325 assert(Lane <= M && M < (Lane + NumEltsPerLane) &&
"Out of range mask");
11326 M = M % NumEltsPerLane;
11327 Range1.first = std::min(Range1.first, M);
11328 Range1.second = std::max(Range1.second, M);
11331 Blend2 &= (M == (Lane + Elt));
11332 assert(Lane <= M && M < (Lane + NumEltsPerLane) &&
"Out of range mask");
11333 M = M % NumEltsPerLane;
11334 Range2.first = std::min(Range2.first, M);
11335 Range2.second = std::max(Range2.second, M);
11343 if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) ||
11344 !(0 <= Range2.first && Range2.second < NumEltsPerLane))
11358 for (
int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
11359 for (
int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
11360 int M = Mask[Lane + Elt];
11364 PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
11366 PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
11373 if (Range2.second < Range1.first)
11374 return RotateAndPermute(V1, V2, Range1.first, 0);
11375 if (Range1.second < Range2.first)
11376 return RotateAndPermute(V2, V1, Range2.first, NumElts);
11390 size_t NumUndefs = 0;
11391 std::optional<int> UniqueElt;
11392 for (
int Elt : Mask) {
11397 if (UniqueElt.has_value() && UniqueElt.value() != Elt)
11403 return NumUndefs <= Mask.size() / 2 && UniqueElt.has_value();
11416 int NumElts = Mask.size();
11418 int NumEltsPerLane = NumElts / NumLanes;
11422 bool IsAlternating =
true;
11423 bool V1Zero =
true, V2Zero =
true;
11427 for (
int i = 0; i < NumElts; ++i) {
11429 if (M >= 0 && M < NumElts) {
11432 V1Zero &= Zeroable[i];
11433 IsAlternating &= (i & 1) == 0;
11434 }
else if (M >= NumElts) {
11435 V2Mask[i] = M - NumElts;
11436 FinalMask[i] = i + NumElts;
11437 V2Zero &= Zeroable[i];
11438 IsAlternating &= (i & 1) == 1;
11445 auto canonicalizeBroadcastableInput = [
DL, VT, &Subtarget,
11448 unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
11449 if (!Subtarget.
hasAVX2() && (!Subtarget.
hasAVX() || EltSizeInBits < 32 ||
11455 "Expected to demand only the 0'th element.");
11458 int &InputMaskElt =
I.value();
11459 if (InputMaskElt >= 0)
11460 InputMaskElt =
I.index();
11470 canonicalizeBroadcastableInput(V1, V1Mask);
11471 canonicalizeBroadcastableInput(V2, V2Mask);
11496 DL, VT, V1, V2, Mask, Subtarget, DAG))
11504 DL, VT, V1, V2, Mask, Subtarget, DAG))
11513 V1Mask.
assign(NumElts, -1);
11514 V2Mask.
assign(NumElts, -1);
11515 FinalMask.
assign(NumElts, -1);
11516 for (
int i = 0; i != NumElts; i += NumEltsPerLane)
11517 for (
int j = 0; j != NumEltsPerLane; ++j) {
11518 int M = Mask[i + j];
11519 if (M >= 0 && M < NumElts) {
11520 V1Mask[i + (j / 2)] = M;
11521 FinalMask[i + j] = i + (j / 2);
11522 }
else if (M >= NumElts) {
11523 V2Mask[i + (j / 2)] = M - NumElts;
11524 FinalMask[i + j] = i + (j / 2) + NumElts;
11538 assert(EltSizeInBits < 64 &&
"Can't rotate 64-bit integers");
11541 int MinSubElts = Subtarget.
hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2;
11542 int MaxSubElts = 64 / EltSizeInBits;
11543 unsigned RotateAmt, NumSubElts;
11545 MaxSubElts, NumSubElts, RotateAmt))
11547 unsigned NumElts = Mask.size();
11562 if (!IsLegal && Subtarget.
hasSSE3())
11575 if ((RotateAmt % 16) == 0)
11578 unsigned ShlAmt = RotateAmt;
11600 int NumElts = Mask.size();
11611 for (
int i = 0; i < NumElts; ++i) {
11614 "Unexpected mask index.");
11619 int StartIdx = i - (M % NumElts);
11627 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
11630 Rotation = CandidateRotation;
11631 else if (Rotation != CandidateRotation)
11636 SDValue MaskV = M < NumElts ? V1 : V2;
11647 else if (TargetV != MaskV)
11654 assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
11655 assert((
Lo ||
Hi) &&
"Failed to find a rotated input vector!");
11700 int NumElts = RepeatedMask.
size();
11701 int Scale = 16 / NumElts;
11702 return Rotation * Scale;
11713 if (ByteRotation <= 0)
11725 "512-bit PALIGNR requires BWI instructions");
11732 "Rotate-based lowering only supports 128-bit lowering!");
11733 assert(Mask.size() <= 16 &&
11734 "Can shuffle at most 16 bytes in a 128-bit vector!");
11735 assert(ByteVT == MVT::v16i8 &&
11736 "SSE2 rotate lowering only needed for v16i8!");
11739 int LoByteShift = 16 - ByteRotation;
11740 int HiByteShift = ByteRotation;
11764 const APInt &Zeroable,
11768 "Only 32-bit and 64-bit elements are supported!");
11772 &&
"VLX required for 128/256-bit vectors");
11784 unsigned NumElts = Mask.size();
11787 assert((ZeroLo + ZeroHi) < NumElts &&
"Zeroable shuffle detected");
11788 if (!ZeroLo && !ZeroHi)
11792 SDValue Src = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11793 int Low = Mask[ZeroLo] < (int)NumElts ? 0 : NumElts;
11801 SDValue Src = Mask[0] < (int)NumElts ? V1 : V2;
11802 int Low = Mask[0] < (int)NumElts ? 0 : NumElts;
11815 const APInt &Zeroable,
11825 if (!ZeroLo && !ZeroHi)
11828 unsigned NumElts = Mask.size();
11829 unsigned Len = NumElts - (ZeroLo + ZeroHi);
11839 SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11848 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
11853 }
else if (ZeroHi == 0) {
11854 unsigned Shift = Mask[ZeroLo] % NumElts;
11859 }
else if (!Subtarget.
hasSSSE3()) {
11863 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
11866 Shift += Mask[ZeroLo] % NumElts;
11902 int MaskOffset,
const APInt &Zeroable,
11904 int Size = Mask.size();
11905 unsigned SizeInBits =
Size * ScalarSizeInBits;
11907 auto CheckZeros = [&](
int Shift,
int Scale,
bool Left) {
11908 for (
int i = 0; i <
Size; i += Scale)
11909 for (
int j = 0; j < Shift; ++j)
11910 if (!Zeroable[i + j + (
Left ? 0 : (Scale - Shift))])
11916 auto MatchShift = [&](
int Shift,
int Scale,
bool Left) {
11917 for (
int i = 0; i !=
Size; i += Scale) {
11918 unsigned Pos =
Left ? i + Shift : i;
11919 unsigned Low =
Left ? i : i + Shift;
11920 unsigned Len = Scale - Shift;
11925 int ShiftEltBits = ScalarSizeInBits * Scale;
11926 bool ByteShift = ShiftEltBits > 64;
11929 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
11933 Scale = ByteShift ? Scale / 2 : Scale;
11939 return (
int)ShiftAmt;
11948 unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
11949 for (
int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
11950 for (
int Shift = 1; Shift != Scale; ++Shift)
11951 for (
bool Left : {
true,
false})
11952 if (CheckZeros(Shift, Scale,
Left)) {
11953 int ShiftAmt = MatchShift(Shift, Scale,
Left);
11964 const APInt &Zeroable,
11967 int Size = Mask.size();
11976 Mask, 0, Zeroable, Subtarget);
11979 if (ShiftAmt < 0) {
11981 Mask,
Size, Zeroable, Subtarget);
11992 "Illegal integer vector type");
11994 V = DAG.
getNode(Opcode,
DL, ShiftVT, V,
12004 int Size = Mask.size();
12005 int HalfSize =
Size / 2;
12015 int Len = HalfSize;
12016 for (; Len > 0; --Len)
12017 if (!Zeroable[Len - 1])
12019 assert(Len > 0 &&
"Zeroable shuffle mask");
12024 for (
int i = 0; i != Len; ++i) {
12033 if (i > M || M >= HalfSize)
12036 if (
Idx < 0 || (Src == V &&
Idx == (M - i))) {
12044 if (!Src ||
Idx < 0)
12047 assert((
Idx + Len) <= HalfSize &&
"Illegal extraction mask");
12060 int Size = Mask.size();
12061 int HalfSize =
Size / 2;
12068 for (
int Idx = 0;
Idx != HalfSize; ++
Idx) {
12084 for (
int Hi =
Idx + 1;
Hi <= HalfSize; ++
Hi) {
12086 int Len =
Hi -
Idx;
12100 }
else if ((!
Base || (
Base == V1)) &&
12103 }
else if ((!
Base || (
Base == V2)) &&
12153 assert(Scale > 1 &&
"Need a scale to extend.");
12156 int NumEltsPerLane = 128 / EltBits;
12157 int OffsetLane =
Offset / NumEltsPerLane;
12158 assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
12159 "Only 8, 16, and 32 bit elements can be extended.");
12160 assert(Scale * EltBits <= 64 &&
"Cannot zero extend past 64 bits.");
12161 assert(0 <=
Offset &&
"Extension offset must be positive.");
12163 "Extension offset must be in the first lane or start an upper lane.");
12166 auto SafeOffset = [&](
int Idx) {
12167 return OffsetLane == (
Idx / NumEltsPerLane);
12171 auto ShuffleOffset = [&](
SDValue V) {
12176 for (
int i = 0; i * Scale < NumElements; ++i) {
12177 int SrcIdx = i +
Offset;
12178 ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
12191 NumElements / Scale);
12193 InputV = ShuffleOffset(InputV);
12195 DL, ExtVT, InputV, DAG);
12204 if (AnyExt && EltBits == 32) {
12212 if (AnyExt && EltBits == 16 && Scale > 2) {
12213 int PSHUFDMask[4] = {
Offset / 2, -1,
12218 int PSHUFWMask[4] = {1, -1, -1, -1};
12221 VT, DAG.
getNode(OddEvenOp,
DL, MVT::v8i16,
12228 if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
12229 assert(NumElements == (
int)Mask.size() &&
"Unexpected shuffle mask size!");
12232 int LoIdx =
Offset * EltBits;
12241 int HiIdx = (
Offset + 1) * EltBits;
12253 if (Scale > 4 && EltBits == 8 && Subtarget.
hasSSSE3()) {
12254 assert(NumElements == 16 &&
"Unexpected byte vector width!");
12256 for (
int i = 0; i < 16; ++i) {
12258 if ((i % Scale == 0 && SafeOffset(
Idx))) {
12265 InputV = DAG.
getBitcast(MVT::v16i8, InputV);
12273 int AlignToUnpack =
Offset % (NumElements / Scale);
12274 if (AlignToUnpack) {
12276 for (
int i = AlignToUnpack; i < NumElements; ++i)
12277 ShMask[i - AlignToUnpack] = i;
12279 Offset -= AlignToUnpack;
12285 if (
Offset >= (NumElements / 2)) {
12287 Offset -= (NumElements / 2);
12294 InputV = DAG.
getNode(UnpackLoHi,
DL, InputVT, InputV, Ext);
12298 }
while (Scale > 1);
12319 int NumLanes = Bits / 128;
12321 int NumEltsPerLane = NumElements / NumLanes;
12323 "Exceeds 32-bit integer zero extension limit");
12324 assert((
int)Mask.size() == NumElements &&
"Unexpected shuffle mask size");
12330 bool AnyExt =
true;
12333 for (
int i = 0; i < NumElements; ++i) {
12337 if (i % Scale != 0) {
12349 SDValue V = M < NumElements ? V1 : V2;
12350 M = M % NumElements;
12353 Offset = M - (i / Scale);
12354 }
else if (InputV != V)
12361 (
Offset % NumEltsPerLane) == 0))
12366 if (
Offset && (
Offset / NumEltsPerLane) != (M / NumEltsPerLane))
12369 if ((M % NumElements) != (
Offset + (i / Scale)))
12382 if (
Offset != 0 && Matches < 2)
12386 InputV, Mask, Subtarget, DAG);
12390 assert(Bits % 64 == 0 &&
12391 "The number of bits in a vector must be divisible by 64 on x86!");
12392 int NumExtElements = Bits / 64;
12396 for (; NumExtElements < NumElements; NumExtElements *= 2) {
12397 assert(NumElements % NumExtElements == 0 &&
12398 "The input vector size must be divisible by the extended size.");
12409 auto CanZExtLowHalf = [&]() {
12410 for (
int i = NumElements / 2; i != NumElements; ++i)
12420 if (
SDValue V = CanZExtLowHalf()) {
12435 MVT VT = V.getSimpleValueType();
12441 MVT NewVT = V.getSimpleValueType();
12462 return V->hasOneUse() &&
12466template<
typename T>
12468 T EltVT = VT.getScalarType();
12469 return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
12470 (EltVT == MVT::f16 && !Subtarget.hasFP16());
12490 find_if(Mask, [&Mask](
int M) {
return M >= (int)Mask.size(); }) -
12493 bool IsV1Zeroable =
true;
12494 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
12495 if (i != V2Index && !Zeroable[i]) {
12496 IsV1Zeroable =
false;
12501 if (!IsV1Zeroable) {
12503 V1Mask[V2Index] = -1;
12518 if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
12522 if (!IsV1Zeroable && !(IsV1Constant && V2Index == 0))
12531 if (!IsV1Zeroable) {
12542 }
else if (Mask[V2Index] != (
int)Mask.size() || EltVT == MVT::i8 ||
12543 (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
12549 if (!IsV1Zeroable) {
12552 assert(VT == ExtVT &&
"Cannot change extended type when non-zeroable!");
12559 unsigned MovOpc = 0;
12560 if (EltVT == MVT::f16)
12562 else if (EltVT == MVT::f32)
12564 else if (EltVT == MVT::f64)
12568 return DAG.
getNode(MovOpc,
DL, ExtVT, V1, V2);
12579 if (V2Index != 0) {
12586 V2Shuffle[V2Index] = 0;
12608 "We can only lower integer broadcasts with AVX2!");
12614 assert(V0VT.
isVector() &&
"Unexpected non-vector vector-sized value!");
12624 if (V0EltSize <= EltSize)
12627 assert(((V0EltSize % EltSize) == 0) &&
12628 "Scalar type sizes must all be powers of 2 on x86!");
12631 const unsigned Scale = V0EltSize / EltSize;
12632 const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
12644 if (
const int OffsetIdx = BroadcastIdx % Scale)
12658 assert(Mask.size() == 4 &&
"Unsupported mask size!");
12659 assert(Mask[0] >= -1 && Mask[0] < 8 &&
"Out of bound mask element!");
12660 assert(Mask[1] >= -1 && Mask[1] < 8 &&
"Out of bound mask element!");
12661 assert(Mask[2] >= -1 && Mask[2] < 8 &&
"Out of bound mask element!");
12662 assert(Mask[3] >= -1 && Mask[3] < 8 &&
"Out of bound mask element!");
12666 if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
12668 if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
12680 assert((Input == 0 || Input == 1) &&
"Only two inputs to shuffles.");
12681 int Size = Mask.size();
12682 for (
int i = 0; i <
Size; ++i)
12683 if (Mask[i] >= 0 && Mask[i] /
Size == Input && Mask[i] %
Size != i)
12698 "VPERM* family of shuffles requires 32-bit or 64-bit elements");
12718 if (ExtIndex1 == 0 && ExtIndex0 == NumElts)
12720 else if (ExtIndex0 != 0 || ExtIndex1 != NumElts)
12726 if (NumElts == 4 &&
12731 NewMask.
append(NumElts, -1);
12751 if (!((Subtarget.
hasSSE3() && VT == MVT::v2f64) ||
12752 (Subtarget.
hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
12759 unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.
hasAVX2())
12766 if (BroadcastIdx < 0)
12768 assert(BroadcastIdx < (
int)Mask.size() &&
"We only expect to be called with "
12769 "a sorted mask where the broadcast "
12771 int NumActiveElts =
count_if(Mask, [](
int M) {
return M >= 0; });
12777 int BitOffset = BroadcastIdx * NumEltBits;
12780 switch (V.getOpcode()) {
12782 V = V.getOperand(0);
12786 int OpBitWidth = V.getOperand(0).getValueSizeInBits();
12787 int OpIdx = BitOffset / OpBitWidth;
12788 V = V.getOperand(OpIdx);
12789 BitOffset %= OpBitWidth;
12794 unsigned EltBitWidth = V.getScalarValueSizeInBits();
12795 unsigned Idx = V.getConstantOperandVal(1);
12796 unsigned BeginOffset =
Idx * EltBitWidth;
12797 BitOffset += BeginOffset;
12798 V = V.getOperand(0);
12802 SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
12804 int Idx = (int)V.getConstantOperandVal(2);
12805 int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
12806 int BeginOffset =
Idx * EltBitWidth;
12807 int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
12808 if (BeginOffset <= BitOffset && BitOffset < EndOffset) {
12809 BitOffset -= BeginOffset;
12819 assert((BitOffset % NumEltBits) == 0 &&
"Illegal bit-offset");
12820 BroadcastIdx = BitOffset / NumEltBits;
12823 bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits;
12832 DL, VT, V, BroadcastIdx, Subtarget, DAG))
12833 return TruncBroadcast;
12839 V = V.getOperand(BroadcastIdx);
12845 cast<LoadSDNode>(V)->isSimple()) {
12855 assert((
int)(
Offset * 8) == BitOffset &&
"Unexpected bit-offset");
12872 assert(SVT == MVT::f64 &&
"Unexpected VT!");
12877 }
else if (!BroadcastFromReg) {
12880 }
else if (BitOffset != 0) {
12888 if (VT == MVT::v4f64 || VT == MVT::v4i64)
12893 if (BitOffset < 128 && NumActiveElts > 1 &&
12894 V.getScalarValueSizeInBits() == NumEltBits) {
12895 assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
12896 "Unexpected bit-offset");
12898 ExtractMask[0] = BitOffset / V.getScalarValueSizeInBits();
12903 if ((BitOffset % 128) != 0)
12906 assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
12907 "Unexpected bit-offset");
12908 assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) &&
12909 "Unexpected vector size");
12910 unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits();
12918 if (Subtarget.
hasAVX()) {
12926 if (!V.getValueType().isVector()) {
12927 assert(V.getScalarValueSizeInBits() == NumEltBits &&
12928 "Unexpected scalar size");
12937 if (V.getValueSizeInBits() > 128)
12942 unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
12954 unsigned &InsertPSMask,
12955 const APInt &Zeroable,
12958 assert(V2.getSimpleValueType().is128BitVector() &&
"Bad operand type!");
12959 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
12966 unsigned ZMask = 0;
12967 int VADstIndex = -1;
12968 int VBDstIndex = -1;
12969 bool VAUsedInPlace =
false;
12971 for (
int i = 0; i < 4; ++i) {
12979 if (i == CandidateMask[i]) {
12980 VAUsedInPlace =
true;
12985 if (VADstIndex >= 0 || VBDstIndex >= 0)
12988 if (CandidateMask[i] < 4) {
12998 if (VADstIndex < 0 && VBDstIndex < 0)
13003 unsigned VBSrcIndex = 0;
13004 if (VADstIndex >= 0) {
13007 VBSrcIndex = CandidateMask[VADstIndex];
13008 VBDstIndex = VADstIndex;
13011 VBSrcIndex = CandidateMask[VBDstIndex] - 4;
13016 if (!VAUsedInPlace)
13024 InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask;
13025 assert((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!");
13029 if (matchAsInsertPS(V1, V2, Mask))
13035 if (matchAsInsertPS(V2, V1, CommutedMask))
13045 assert(V2.getSimpleValueType() == MVT::v4f32 &&
"Bad operand type!");
13048 unsigned InsertPSMask = 0;
13069 assert(V2.getSimpleValueType() == MVT::v2f64 &&
"Bad operand type!");
13070 assert(Mask.size() == 2 &&
"Unexpected mask size for v2 shuffle!");
13072 if (V2.isUndef()) {
13075 Mask, Subtarget, DAG))
13080 unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
13082 if (Subtarget.
hasAVX()) {
13095 assert(Mask[0] >= 0 &&
"No undef lanes in multi-input v2 shuffles!");
13096 assert(Mask[1] >= 0 &&
"No undef lanes in multi-input v2 shuffles!");
13097 assert(Mask[0] < 2 &&
"We sort V1 to be the first input.");
13098 assert(Mask[1] >= 2 &&
"We sort V2 to be the second input.");
13107 DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
13111 int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
13112 Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
13114 DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
13130 Zeroable, Subtarget, DAG))
13137 unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
13153 assert(V2.getSimpleValueType() == MVT::v2i64 &&
"Bad operand type!");
13154 assert(Mask.size() == 2 &&
"Unexpected mask size for v2 shuffle!");
13156 if (V2.isUndef()) {
13159 Mask, Subtarget, DAG))
13166 int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),
13167 Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),
13168 Mask[1] < 0 ? -1 : (Mask[1] * 2),
13169 Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};
13175 assert(Mask[0] != -1 &&
"No undef lanes in multi-input v2 shuffles!");
13176 assert(Mask[1] != -1 &&
"No undef lanes in multi-input v2 shuffles!");
13177 assert(Mask[0] < 2 &&
"We sort V1 to be the first input.");
13178 assert(Mask[1] >= 2 &&
"We sort V2 to be the second input.");
13193 DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
13197 int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
13199 DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
13204 bool IsBlendSupported = Subtarget.
hasSSE41();
13205 if (IsBlendSupported)
13207 Zeroable, Subtarget, DAG))
13217 if (Subtarget.hasVLX())
13219 Zeroable, Subtarget, DAG))
13229 if (IsBlendSupported)
13231 Zeroable, Subtarget, DAG);
13251 SDValue LowV = V1, HighV = V2;
13253 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13255 if (NumV2Elements == 1) {
13256 int V2Index =
find_if(Mask, [](
int M) {
return M >= 4; }) - Mask.begin();
13260 int V2AdjIndex = V2Index ^ 1;
13262 if (Mask[V2AdjIndex] < 0) {
13268 NewMask[V2Index] -= 4;
13272 int V1Index = V2AdjIndex;
13273 int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
13285 NewMask[V1Index] = 2;
13286 NewMask[V2Index] = 0;
13288 }
else if (NumV2Elements == 2) {
13289 if (Mask[0] < 4 && Mask[1] < 4) {
13294 }
else if (Mask[2] < 4 && Mask[3] < 4) {
13309 int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
13310 Mask[2] < 4 ? Mask[2] : Mask[3],
13311 (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
13312 (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
13319 NewMask[0] = Mask[0] < 4 ? 0 : 2;
13320 NewMask[1] = Mask[0] < 4 ? 2 : 0;
13321 NewMask[2] = Mask[2] < 4 ? 1 : 3;
13322 NewMask[3] = Mask[2] < 4 ? 3 : 1;
13324 }
else if (NumV2Elements == 3) {
13345 assert(V2.getSimpleValueType() == MVT::v4f32 &&
"Bad operand type!");
13346 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
13350 Zeroable, Subtarget, DAG))
13353 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13355 if (NumV2Elements == 0) {
13358 Mask, Subtarget, DAG))
13369 if (Subtarget.
hasAVX()) {
13393 DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) {
13407 if (NumV2Elements == 1 && Mask[0] >= 4)
13409 DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
13449 assert(V2.getSimpleValueType() == MVT::v4i32 &&
"Bad operand type!");
13450 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
13456 Zeroable, Subtarget, DAG))
13459 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13462 if (Subtarget.preferLowerShuffleAsShift()) {
13465 Subtarget, DAG,
true))
13467 if (NumV2Elements == 0)
13473 if (NumV2Elements == 0) {
13475 if (
count_if(Mask, [](
int M) {
return M >= 0 && M < 4; }) > 1) {
13477 Mask, Subtarget, DAG))
13486 const int UnpackLoMask[] = {0, 0, 1, 1};
13487 const int UnpackHiMask[] = {2, 2, 3, 3};
13489 Mask = UnpackLoMask;
13491 Mask = UnpackHiMask;
13508 if (NumV2Elements == 1)
13510 DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
13515 bool IsBlendSupported = Subtarget.
hasSSE41();
13516 if (IsBlendSupported)
13518 Zeroable, Subtarget, DAG))
13522 Zeroable, Subtarget, DAG))
13532 if (Subtarget.hasVLX())
13533 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
13534 Zeroable, Subtarget, DAG))
13537 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
13548 if (IsBlendSupported)
13550 Zeroable, Subtarget, DAG);
13554 Mask, Subtarget, DAG))
13591 assert(Mask.size() == 8 &&
"Shuffle mask length doesn't match!");
13603 for (
int i = 0; i != 4; ++i)
13604 HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
13610 copy_if(LoMask, std::back_inserter(LoInputs), [](
int M) {
return M >= 0; });
13614 copy_if(HiMask, std::back_inserter(HiInputs), [](
int M) {
return M >= 0; });
13618 int NumHToL = LoInputs.
size() - NumLToL;
13620 int NumHToH = HiInputs.
size() - NumLToH;
13639 if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
13640 int PSHUFDMask[4] = { -1, -1, -1, -1 };
13642 int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
13645 for (
int DWord = 0; DWord != 4; ++DWord) {
13646 int M0 = Mask[2 * DWord + 0];
13647 int M1 = Mask[2 * DWord + 1];
13650 if (
M0 < 0 &&
M1 < 0)
13653 bool Match =
false;
13654 for (
int j = 0, e = DWordPairs.
size(); j < e; ++j) {
13655 auto &DWordPair = DWordPairs[j];
13658 DWordPair.first = (
M0 >= 0 ?
M0 : DWordPair.first);
13659 DWordPair.second = (
M1 >= 0 ?
M1 : DWordPair.second);
13660 PSHUFDMask[DWord] = DOffset + j;
13666 PSHUFDMask[DWord] = DOffset + DWordPairs.
size();
13671 if (DWordPairs.
size() <= 2) {
13672 DWordPairs.
resize(2, std::make_pair(-1, -1));
13673 int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
13674 DWordPairs[1].first, DWordPairs[1].second};
13675 if ((NumHToL + NumHToH) == 0)
13676 return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask,
X86ISD::PSHUFLW);
13677 if ((NumLToL + NumLToH) == 0)
13678 return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask,
X86ISD::PSHUFHW);
13714 int AOffset,
int BOffset) {
13716 "Must call this with A having 3 or 1 inputs from the A half.");
13718 "Must call this with B having 1 or 3 inputs from the B half.");
13720 "Must call this with either 3:1 or 1:3 inputs (summing to 4).");
13722 bool ThreeAInputs = AToAInputs.
size() == 3;
13728 int ADWord = 0, BDWord = 0;
13729 int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
13730 int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
13731 int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
13732 ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
13733 int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
13734 int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
13735 int TripleNonInputIdx =
13736 TripleInputSum - std::accumulate(TripleInputs.
begin(), TripleInputs.
end(), 0);
13737 TripleDWord = TripleNonInputIdx / 2;
13741 OneInputDWord = (OneInput / 2) ^ 1;
13748 if (BToBInputs.
size() == 2 && AToBInputs.
size() == 2) {
13753 int NumFlippedAToBInputs =
llvm::count(AToBInputs, 2 * ADWord) +
13755 int NumFlippedBToBInputs =
llvm::count(BToBInputs, 2 * BDWord) +
13757 if ((NumFlippedAToBInputs == 1 &&
13758 (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
13759 (NumFlippedBToBInputs == 1 &&
13760 (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
13765 auto FixFlippedInputs = [&V, &
DL, &Mask, &DAG](
int PinnedIdx,
int DWord,
13767 int FixIdx = PinnedIdx ^ 1;
13768 bool IsFixIdxInput =
is_contained(Inputs, PinnedIdx ^ 1);
13772 int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
13773 bool IsFixFreeIdxInput =
is_contained(Inputs, FixFreeIdx);
13774 if (IsFixIdxInput == IsFixFreeIdxInput)
13777 assert(IsFixIdxInput != IsFixFreeIdxInput &&
13778 "We need to be changing the number of flipped inputs!");
13779 int PSHUFHalfMask[] = {0, 1, 2, 3};
13780 std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
13786 for (
int &M : Mask)
13787 if (M >= 0 && M == FixIdx)
13789 else if (M >= 0 && M == FixFreeIdx)
13792 if (NumFlippedBToBInputs != 0) {
13794 BToAInputs.
size() == 3 ? TripleNonInputIdx : OneInput;
13795 FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
13797 assert(NumFlippedAToBInputs != 0 &&
"Impossible given predicates!");
13798 int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
13799 FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
13804 int PSHUFDMask[] = {0, 1, 2, 3};
13805 PSHUFDMask[ADWord] = BDWord;
13806 PSHUFDMask[BDWord] = ADWord;
13813 for (
int &M : Mask)
13814 if (M >= 0 && M/2 == ADWord)
13815 M = 2 * BDWord + M % 2;
13816 else if (M >= 0 && M/2 == BDWord)
13817 M = 2 * ADWord + M % 2;
13823 if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
13824 return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
13825 if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
13826 return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
13833 int PSHUFLMask[4] = {-1, -1, -1, -1};
13834 int PSHUFHMask[4] = {-1, -1, -1, -1};
13835 int PSHUFDMask[4] = {-1, -1, -1, -1};
13840 auto fixInPlaceInputs =
13844 if (InPlaceInputs.
empty())
13846 if (InPlaceInputs.
size() == 1) {
13847 SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
13848 InPlaceInputs[0] - HalfOffset;
13849 PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
13852 if (IncomingInputs.
empty()) {
13854 for (
int Input : InPlaceInputs) {
13855 SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
13856 PSHUFDMask[Input / 2] = Input / 2;
13861 assert(InPlaceInputs.
size() == 2 &&
"Cannot handle 3 or 4 inputs!");
13862 SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
13863 InPlaceInputs[0] - HalfOffset;
13866 int AdjIndex = InPlaceInputs[0] ^ 1;
13867 SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
13868 std::replace(HalfMask.
begin(), HalfMask.
end(), InPlaceInputs[1], AdjIndex);
13869 PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
13871 fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
13872 fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
13878 auto moveInputsToRightHalf = [&PSHUFDMask](
13883 auto isWordClobbered = [](
ArrayRef<int> SourceHalfMask,
int Word) {
13884 return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
13886 auto isDWordClobbered = [&isWordClobbered](
ArrayRef<int> SourceHalfMask,
13888 int LowWord = Word & ~1;
13889 int HighWord = Word | 1;
13890 return isWordClobbered(SourceHalfMask, LowWord) ||
13891 isWordClobbered(SourceHalfMask, HighWord);
13894 if (IncomingInputs.
empty())
13897 if (ExistingInputs.
empty()) {
13899 for (
int Input : IncomingInputs) {
13902 if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
13903 if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {
13904 SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
13905 Input - SourceOffset;
13907 for (
int &M : HalfMask)
13908 if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
13910 else if (M == Input)
13911 M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
13913 assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
13914 Input - SourceOffset &&
13915 "Previous placement doesn't match!");
13920 Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
13924 if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)
13925 PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
13927 assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
13929 "Previous placement doesn't match!");
13935 for (
int &M : HalfMask)
13936 if (M >= SourceOffset && M < SourceOffset + 4) {
13937 M = M - SourceOffset + DestOffset;
13938 assert(M >= 0 &&
"This should never wrap below zero!");
13946 if (IncomingInputs.
size() == 1) {
13947 if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
13948 int InputFixed =
find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
13950 SourceHalfMask[InputFixed - SourceOffset] =
13951 IncomingInputs[0] - SourceOffset;
13952 std::replace(HalfMask.
begin(), HalfMask.
end(), IncomingInputs[0],
13954 IncomingInputs[0] = InputFixed;
13956 }
else if (IncomingInputs.
size() == 2) {
13957 if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
13958 isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
13962 int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
13963 IncomingInputs[1] - SourceOffset};
13968 if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
13969 SourceHalfMask[InputsFixed[0] ^ 1] < 0) {
13970 SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
13971 SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
13972 InputsFixed[1] = InputsFixed[0] ^ 1;
13973 }
else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
13974 SourceHalfMask[InputsFixed[1] ^ 1] < 0) {
13975 SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
13976 SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
13977 InputsFixed[0] = InputsFixed[1] ^ 1;
13978 }
else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 &&
13979 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) {
13983 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
13984 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
13985 InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
13986 InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
13992 for (
int i = 0; i < 4; ++i)
13993 assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) &&
13994 "We can't handle any clobbers here!");
13995 assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
13996 "Cannot have adjacent inputs here!");
13998 SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
13999 SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
14003 for (
int &M : FinalSourceHalfMask)
14004 if (M == (InputsFixed[0] ^ 1) + SourceOffset)
14005 M = InputsFixed[1] + SourceOffset;
14006 else if (M == InputsFixed[1] + SourceOffset)
14007 M = (InputsFixed[0] ^ 1) + SourceOffset;
14009 InputsFixed[1] = InputsFixed[0] ^ 1;
14013 for (
int &M : HalfMask)
14014 if (M == IncomingInputs[0])
14015 M = InputsFixed[0] + SourceOffset;
14016 else if (M == IncomingInputs[1])
14017 M = InputsFixed[1] + SourceOffset;
14019 IncomingInputs[0] = InputsFixed[0] + SourceOffset;
14020 IncomingInputs[1] = InputsFixed[1] + SourceOffset;
14027 int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2;
14028 assert(PSHUFDMask[FreeDWord] < 0 &&
"DWord not free");
14029 PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
14030 for (
int &M : HalfMask)
14031 for (
int Input : IncomingInputs)
14033 M = FreeDWord * 2 + Input % 2;
14035 moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
14037 moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
14057 "Failed to lift all the high half inputs to the low mask!");
14058 assert(
count_if(HiMask, [](
int M) {
return M >= 0 && M < 4; }) == 0 &&
14059 "Failed to lift all the low half inputs to the high mask!");
14067 for (
int &M : HiMask)
14083 "Lane crossing shuffle masks not supported");
14086 int Size = Mask.size();
14087 int Scale = NumBytes /
Size;
14094 for (
int i = 0; i < NumBytes; ++i) {
14095 int M = Mask[i / Scale];
14099 const int ZeroMask = 0x80;
14100 int V1Idx = M <
Size ? M * Scale + i % Scale : ZeroMask;
14101 int V2Idx = M <
Size ? ZeroMask : (M -
Size) * Scale + i % Scale;
14102 if (Zeroable[i / Scale])
14103 V1Idx = V2Idx = ZeroMask;
14107 V1InUse |= (ZeroMask != V1Idx);
14108 V2InUse |= (ZeroMask != V2Idx);
14121 if (V1InUse && V2InUse)
14124 V = V1InUse ? V1 : V2;
14147 assert(V2.getSimpleValueType() == MVT::v8i16 &&
"Bad operand type!");
14148 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
14153 Zeroable, Subtarget, DAG))
14161 int NumV2Inputs =
count_if(Mask, [](
int M) {
return M >= 8; });
14163 if (NumV2Inputs == 0) {
14167 Subtarget, DAG,
false))
14172 Mask, Subtarget, DAG))
14201 "All single-input shuffles should be canonicalized to be V1-input "
14211 if (Subtarget.hasSSE4A())
14217 if (NumV2Inputs == 1)
14219 DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
14224 bool IsBlendSupported = Subtarget.
hasSSE41();
14225 if (IsBlendSupported)
14227 Zeroable, Subtarget, DAG))
14231 Zeroable, Subtarget, DAG))
14259 Zeroable, Subtarget, DAG))
14264 if ((NumEvenDrops == 1 || (NumEvenDrops == 2 && Subtarget.
hasSSE41())) &&
14265 !Subtarget.hasVLX()) {
14267 unsigned PackOpc = 0;
14268 if (NumEvenDrops == 2 && Subtarget.
hasAVX2() &&
14279 }
else if (Subtarget.
hasSSE41()) {
14282 for (
unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
14291 }
else if (!Subtarget.
hasSSSE3()) {
14304 if (NumEvenDrops == 2) {
14305 Result = DAG.
getBitcast(MVT::v4i32, Result);
14306 Result = DAG.
getNode(PackOpc,
DL, MVT::v8i16, Result, Result);
14314 if (NumOddDrops == 1) {
14315 bool HasSSE41 = Subtarget.
hasSSE41();
14323 MVT::v8i16, V1, V2);
14328 Mask, Subtarget, DAG))
14333 if (!IsBlendSupported && Subtarget.
hasSSSE3()) {
14334 bool V1InUse, V2InUse;
14336 Zeroable, DAG, V1InUse, V2InUse);
14342 Zeroable, Subtarget, DAG);
14351 assert(V2.getSimpleValueType() == MVT::v8f16 &&
"Bad operand type!");
14352 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
14353 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 8; });
14355 if (Subtarget.hasFP16()) {
14356 if (NumV2Elements == 0) {
14359 Mask, Subtarget, DAG))
14362 if (NumV2Elements == 1 && Mask[0] >= 8)
14364 DL, MVT::v8f16, V1, V2, Mask, Zeroable, Subtarget, DAG))
14391 MVT ShuffleVT = VT;
14401 for (
int &M : AdjustedMask)
14403 M += (Scale - 1) * NumElts;
14416 if (VT != ShuffleVT)
14434 assert(V2.getSimpleValueType() == MVT::v16i8 &&
"Bad operand type!");
14435 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
14455 Zeroable, Subtarget, DAG))
14468 if (Subtarget.hasSSE4A())
14473 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 16; });
14476 if (NumV2Elements == 0) {
14479 Mask, Subtarget, DAG))
14499 for (
int i = 0; i < 16; i += 2)
14500 if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1])
14505 auto tryToWidenViaDuplication = [&]() ->
SDValue {
14506 if (!canWidenViaDuplication(Mask))
14509 copy_if(Mask, std::back_inserter(LoInputs),
14510 [](
int M) {
return M >= 0 && M < 8; });
14514 copy_if(Mask, std::back_inserter(HiInputs), [](
int M) {
return M >= 8; });
14518 bool TargetLo = LoInputs.
size() >= HiInputs.
size();
14519 ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
14520 ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
14522 int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
14524 for (
int I : InPlaceInputs) {
14525 PreDupI16Shuffle[
I/2] =
I/2;
14528 int j = TargetLo ? 0 : 4, je = j + 4;
14529 for (
int i = 0, ie = MovingInputs.
size(); i < ie; ++i) {
14532 if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
14535 while (j < je && PreDupI16Shuffle[j] >= 0)
14543 PreDupI16Shuffle[j] = MovingInputs[i] / 2;
14547 LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
14552 DAG.
getUNDEF(MVT::v8i16), PreDupI16Shuffle));
14555 bool EvenInUse =
false, OddInUse =
false;
14556 for (
int i = 0; i < 16; i += 2) {
14557 EvenInUse |= (Mask[i + 0] >= 0);
14558 OddInUse |= (Mask[i + 1] >= 0);
14559 if (EvenInUse && OddInUse)
14563 MVT::v16i8, EvenInUse ? V1 : DAG.
getUNDEF(MVT::v16i8),
14564 OddInUse ? V1 : DAG.
getUNDEF(MVT::v16i8));
14566 int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
14567 for (
int i = 0; i < 16; ++i)
14568 if (Mask[i] >= 0) {
14569 int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
14570 assert(MappedMask < 8 &&
"Invalid v8 shuffle mask!");
14571 if (PostDupI16Shuffle[i / 2] < 0)
14572 PostDupI16Shuffle[i / 2] = MappedMask;
14574 assert(PostDupI16Shuffle[i / 2] == MappedMask &&
14575 "Conflicting entries in the original shuffle!");
14580 DAG.
getUNDEF(MVT::v8i16), PostDupI16Shuffle));
14582 if (
SDValue V = tryToWidenViaDuplication())
14587 Zeroable, Subtarget, DAG))
14596 Zeroable, Subtarget, DAG))
14600 bool IsSingleInput = V2.isUndef();
14619 if (Subtarget.
hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) {
14620 bool V1InUse =
false;
14621 bool V2InUse =
false;
14624 DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
14629 if (V1InUse && V2InUse) {
14632 Zeroable, Subtarget, DAG))
14644 DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
14648 if (Subtarget.hasVBMI())
14653 if (Subtarget.hasXOP()) {
14661 DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
14669 if (NumV2Elements == 1)
14671 DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
14684 if (NumEvenDrops) {
14690 assert(NumEvenDrops <= 3 &&
14691 "No support for dropping even elements more than 3 times.");
14693 for (
unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1))
14698 if (!IsSingleInput)
14704 IsSingleInput ? V1 : V2);
14705 for (
int i = 1; i < NumEvenDrops; ++i) {
14706 Result = DAG.
getBitcast(MVT::v8i16, Result);
14713 if (NumOddDrops == 1) {
14717 if (!IsSingleInput)
14722 IsSingleInput ? V1 : V2);
14726 if (NumV2Elements > 0)
14728 Zeroable, Subtarget, DAG);
14735 std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
14736 std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
14737 for (
int i = 0; i < 16; ++i)
14739 (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];
14745 if (
none_of(LoBlendMask, [](
int M) {
return M >= 0 && M % 2 == 1; }) &&
14746 none_of(HiBlendMask, [](
int M) {
return M >= 0 && M % 2 == 1; })) {
14753 VHiHalf = DAG.
getUNDEF(MVT::v8i16);
14756 for (
int &M : LoBlendMask)
14759 for (
int &M : HiBlendMask)
14785 const APInt &Zeroable,
14788 if (VT == MVT::v8bf16) {
14825 "Only for 256-bit or wider vector shuffles!");
14827 assert(V2.getSimpleValueType() == VT &&
"Bad operand type!");
14833 int SplitNumElements = NumElements / 2;
14839 auto SplitVector = [&](
SDValue V) {
14842 return std::make_pair(DAG.
getBitcast(SplitVT, LoV),
14846 SDValue LoV1, HiV1, LoV2, HiV2;
14847 std::tie(LoV1, HiV1) = SplitVector(V1);
14848 std::tie(LoV2, HiV2) = SplitVector(V2);
14851 auto GetHalfBlendPiecesReq = [&](
const ArrayRef<int> &HalfMask,
bool &UseLoV1,
14852 bool &UseHiV1,
bool &UseLoV2,
14854 UseLoV1 = UseHiV1 = UseLoV2 = UseHiV2 =
false;
14855 for (
int i = 0; i < SplitNumElements; ++i) {
14856 int M = HalfMask[i];
14857 if (M >= NumElements) {
14858 if (M >= NumElements + SplitNumElements)
14862 }
else if (M >= 0) {
14863 if (M >= SplitNumElements)
14871 auto CheckHalfBlendUsable = [&](
const ArrayRef<int> &HalfMask) ->
bool {
14875 bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
14876 GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
14878 return !(UseHiV1 || UseHiV2);
14885 for (
int i = 0; i < SplitNumElements; ++i) {
14886 int M = HalfMask[i];
14887 if (M >= NumElements) {
14888 V2BlendMask[i] = M - NumElements;
14889 BlendMask[i] = SplitNumElements + i;
14890 }
else if (M >= 0) {
14891 V1BlendMask[i] = M;
14896 bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
14897 GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
14902 assert((!SimpleOnly || (!UseHiV1 && !UseHiV2)) &&
"Shuffle isn't simple");
14905 if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
14907 if (!UseLoV2 && !UseHiV2)
14909 if (!UseLoV1 && !UseHiV1)
14913 if (UseLoV1 && UseHiV1) {
14917 V1Blend = UseLoV1 ? LoV1 : HiV1;
14918 for (
int i = 0; i < SplitNumElements; ++i)
14919 if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
14920 BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
14922 if (UseLoV2 && UseHiV2) {
14926 V2Blend = UseLoV2 ? LoV2 : HiV2;
14927 for (
int i = 0; i < SplitNumElements; ++i)
14928 if (BlendMask[i] >= SplitNumElements)
14929 BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
14934 if (!CheckHalfBlendUsable(LoMask) || !CheckHalfBlendUsable(HiMask))
14952 const APInt &Zeroable,
14955 assert(!V2.isUndef() &&
"This routine must not be used to lower single-input "
14956 "shuffles as it could then recurse on itself.");
14957 int Size = Mask.size();
14962 auto DoBothBroadcast = [&] {
14963 int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
14966 if (V2BroadcastIdx < 0)
14967 V2BroadcastIdx = M -
Size;
14968 else if (M -
Size != V2BroadcastIdx)
14970 }
else if (M >= 0) {
14971 if (V1BroadcastIdx < 0)
14972 V1BroadcastIdx = M;
14973 else if (M != V1BroadcastIdx)
14978 if (DoBothBroadcast())
14986 int LaneSize =
Size / LaneCount;
14988 LaneInputs[0].
resize(LaneCount,
false);
14989 LaneInputs[1].
resize(LaneCount,
false);
14990 for (
int i = 0; i <
Size; ++i)
14992 LaneInputs[Mask[i] /
Size][(Mask[i] %
Size) / LaneSize] =
true;
14993 if (LaneInputs[0].
count() <= 1 && LaneInputs[1].
count() <= 1)
15009 assert(VT == MVT::v4f64 &&
"Only for v4f64 shuffles");
15011 int LHSMask[4] = {-1, -1, -1, -1};
15012 int RHSMask[4] = {-1, -1, -1, -1};
15013 int SHUFPDMask[4] = {-1, -1, -1, -1};
15017 for (
int i = 0; i != 4; ++i) {
15021 int LaneBase = i & ~1;
15022 auto &LaneMask = (i & 1) ? RHSMask : LHSMask;
15023 LaneMask[LaneBase + (M & 1)] = M;
15024 SHUFPDMask[i] = M & 1;
15046 int NumEltsPerLane = NumElts / NumLanes;
15047 bool CanUseSublanes = Subtarget.
hasAVX2() && V2.isUndef();
15054 auto getSublanePermute = [&](
int NumSublanes) ->
SDValue {
15055 int NumSublanesPerLane = NumSublanes / NumLanes;
15056 int NumEltsPerSublane = NumElts / NumSublanes;
15064 for (
int i = 0; i != NumElts; ++i) {
15069 int SrcSublane = M / NumEltsPerSublane;
15070 int DstLane = i / NumEltsPerLane;
15074 bool Found =
false;
15075 int DstSubStart = DstLane * NumSublanesPerLane;
15076 int DstSubEnd = DstSubStart + NumSublanesPerLane;
15077 for (
int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) {
15078 if (!
isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane))
15082 CrossLaneMaskLarge[DstSublane] = SrcSublane;
15083 int DstSublaneOffset = DstSublane * NumEltsPerSublane;
15084 InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
15085 DemandedCrossLane.
setBit(InLaneMask[i]);
15095 if (!CanUseSublanes) {
15100 int NumIdentityLanes = 0;
15101 bool OnlyShuffleLowestLane =
true;
15102 for (
int i = 0; i != NumLanes; ++i) {
15103 int LaneOffset = i * NumEltsPerLane;
15105 i * NumEltsPerLane))
15106 NumIdentityLanes++;
15107 else if (CrossLaneMask[LaneOffset] != 0)
15108 OnlyShuffleLowestLane =
false;
15110 if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
15117 if (CrossLaneMask == Mask || InLaneMask == Mask)
15122 for (
int i = 0; i != NumElts; ++i)
15123 if (!DemandedCrossLane[i])
15132 if (
SDValue V = getSublanePermute(NumLanes))
15136 if (!CanUseSublanes)
15140 if (
SDValue V = getSublanePermute(NumLanes * 2))
15145 if (!Subtarget.hasFastVariableCrossLaneShuffle())
15148 return getSublanePermute(NumLanes * 4);
15154 int Size = Mask.size();
15155 InLaneMask.
assign(Mask.begin(), Mask.end());
15156 for (
int i = 0; i <
Size; ++i) {
15157 int &M = InLaneMask[i];
15160 if (((M %
Size) / LaneSize) != (i / LaneSize))
15161 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) +
Size;
15177 int Size = Mask.size();
15178 int LaneSize =
Size / 2;
15183 if (VT == MVT::v4f64 &&
15184 !
all_of(Mask, [LaneSize](
int M) {
return M < LaneSize; }))
15192 bool LaneCrossing[2] = {
false,
false};
15193 for (
int i = 0; i <
Size; ++i)
15194 if (Mask[i] >= 0 && ((Mask[i] %
Size) / LaneSize) != (i / LaneSize))
15195 LaneCrossing[(Mask[i] %
Size) / LaneSize] =
true;
15196 AllLanes = LaneCrossing[0] && LaneCrossing[1];
15198 bool LaneUsed[2] = {
false,
false};
15199 for (
int i = 0; i <
Size; ++i)
15201 LaneUsed[(Mask[i] %
Size) / LaneSize] =
true;
15202 AllLanes = LaneUsed[0] && LaneUsed[1];
15207 "This last part of this routine only works on single input shuffles");
15213 "In-lane shuffle mask expected");
15233 const APInt &Zeroable,
15236 if (V2.isUndef()) {
15246 VT, MemVT, Ld, Ofs, DAG))
15261 bool IsLowZero = (Zeroable & 0x3) == 0x3;
15262 bool IsHighZero = (Zeroable & 0xc) == 0xc;
15265 if (WidenedMask[0] == 0 && IsHighZero) {
15285 if (!IsLowZero && !IsHighZero) {
15304 if (Subtarget.hasVLX()) {
15305 if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
15306 unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
15307 ((WidenedMask[1] % 2) << 1);
15327 assert((WidenedMask[0] >= 0 || IsLowZero) &&
15328 (WidenedMask[1] >= 0 || IsHighZero) &&
"Undef half?");
15330 unsigned PermMask = 0;
15331 PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
15332 PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
15335 if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00)
15337 if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20)
15354 assert(!V2.isUndef() &&
"This is only useful with multiple inputs.");
15359 int NumElts = Mask.size();
15367 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15368 int Srcs[2] = {-1, -1};
15370 for (
int i = 0; i != NumLaneElts; ++i) {
15371 int M = Mask[(Lane * NumLaneElts) + i];
15378 int LaneSrc = M / NumLaneElts;
15380 if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
15382 else if (Srcs[1] < 0 || Srcs[1] == LaneSrc)
15387 Srcs[Src] = LaneSrc;
15388 InLaneMask[i] = (M % NumLaneElts) + Src * NumElts;
15395 LaneSrcs[Lane][0] = Srcs[0];
15396 LaneSrcs[Lane][1] = Srcs[1];
15399 assert(
M1.size() == M2.size() &&
"Unexpected mask size");
15400 for (
int i = 0, e =
M1.size(); i != e; ++i)
15401 if (
M1[i] >= 0 && M2[i] >= 0 &&
M1[i] != M2[i])
15407 assert(Mask.size() == MergedMask.size() &&
"Unexpected mask size");
15408 for (
int i = 0, e = MergedMask.size(); i != e; ++i) {
15412 assert((MergedMask[i] < 0 || MergedMask[i] == M) &&
15413 "Unexpected mask element");
15418 if (MatchMasks(InLaneMask, RepeatMask)) {
15420 MergeMasks(InLaneMask, RepeatMask);
15425 std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
15428 if (MatchMasks(InLaneMask, RepeatMask)) {
15430 MergeMasks(InLaneMask, RepeatMask);
15439 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15441 if (LaneSrcs[Lane][0] >= 0)
15444 for (
int i = 0; i != NumLaneElts; ++i) {
15445 int M = Mask[(Lane * NumLaneElts) + i];
15450 if (RepeatMask[i] < 0)
15451 RepeatMask[i] = M % NumLaneElts;
15453 if (RepeatMask[i] < NumElts) {
15454 if (RepeatMask[i] != M % NumLaneElts)
15456 LaneSrcs[Lane][0] = M / NumLaneElts;
15458 if (RepeatMask[i] != ((M % NumLaneElts) + NumElts))
15460 LaneSrcs[Lane][1] = M / NumLaneElts;
15464 if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
15469 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15470 int Src = LaneSrcs[Lane][0];
15471 for (
int i = 0; i != NumLaneElts; ++i) {
15474 M = Src * NumLaneElts + i;
15475 NewMask[Lane * NumLaneElts + i] = M;
15482 if (isa<ShuffleVectorSDNode>(NewV1) &&
15483 cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
15486 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15487 int Src = LaneSrcs[Lane][1];
15488 for (
int i = 0; i != NumLaneElts; ++i) {
15491 M = Src * NumLaneElts + i;
15492 NewMask[Lane * NumLaneElts + i] = M;
15499 if (isa<ShuffleVectorSDNode>(NewV2) &&
15500 cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
15503 for (
int i = 0; i != NumElts; ++i) {
15508 NewMask[i] = RepeatMask[i % NumLaneElts];
15509 if (NewMask[i] < 0)
15512 NewMask[i] += (i / NumLaneElts) * NumLaneElts;
15525 int &HalfIdx1,
int &HalfIdx2) {
15526 assert((Mask.size() == HalfMask.
size() * 2) &&
15527 "Expected input mask to be twice as long as output");
15532 if (UndefLower == UndefUpper)
15535 unsigned HalfNumElts = HalfMask.
size();
15536 unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0;
15539 for (
unsigned i = 0; i != HalfNumElts; ++i) {
15540 int M = Mask[i + MaskIndexOffset];
15548 int HalfIdx = M / HalfNumElts;
15551 int HalfElt = M % HalfNumElts;
15555 if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) {
15556 HalfMask[i] = HalfElt;
15557 HalfIdx1 = HalfIdx;
15560 if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) {
15561 HalfMask[i] = HalfElt + HalfNumElts;
15562 HalfIdx2 = HalfIdx;
15577 int HalfIdx2,
bool UndefLower,
15586 auto getHalfVector = [&](
int HalfIdx) {
15589 SDValue V = (HalfIdx < 2 ? V1 : V2);
15590 HalfIdx = (HalfIdx % 2) * HalfNumElts;
15596 SDValue Half1 = getHalfVector(HalfIdx1);
15597 SDValue Half2 = getHalfVector(HalfIdx2);
15607 unsigned Offset = UndefLower ? HalfNumElts : 0;
15620 "Expected 256-bit or 512-bit vector");
15627 "Completely undef shuffle mask should have been simplified already");
15651 int HalfIdx1, HalfIdx2;
15656 assert(HalfMask.
size() == HalfNumElts &&
"Unexpected shuffle mask length");
15659 unsigned NumLowerHalves =
15660 (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
15661 unsigned NumUpperHalves =
15662 (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
15663 assert(NumLowerHalves + NumUpperHalves <= 2 &&
"Only 1 or 2 halves allowed");
15671 if (NumUpperHalves == 0)
15675 if (NumUpperHalves == 1) {
15679 if (EltWidth == 32 && NumLowerHalves && HalfVT.
is128BitVector() &&
15682 Subtarget.hasFastVariableCrossLaneShuffle()))
15688 if (EltWidth == 64 && V2.isUndef())
15692 if (EltWidth == 8 && HalfIdx1 == 0 && HalfIdx2 == 1)
15704 assert(NumUpperHalves == 2 &&
"Half vector count went wrong");
15709 if (NumUpperHalves == 0) {
15712 if (Subtarget.
hasAVX2() && EltWidth == 64)
15735 int NumLaneElts = NumElts / NumLanes;
15740 for (
unsigned BroadcastSize : {16, 32, 64}) {
15749 for (
int i = 0; i != NumElts; i += NumBroadcastElts)
15750 for (
int j = 0; j != NumBroadcastElts; ++j) {
15751 int M = Mask[i + j];
15754 int &R = RepeatMask[j];
15755 if (0 != ((M % NumElts) / NumLaneElts))
15757 if (0 <= R && R != M)
15765 if (!FindRepeatingBroadcastMask(RepeatMask))
15773 for (
int i = 0; i != NumElts; i += NumBroadcastElts)
15774 for (
int j = 0; j != NumBroadcastElts; ++j)
15775 BroadcastMask[i + j] = j;
15779 if (BroadcastMask == Mask)
15797 auto ShuffleSubLanes = [&](
int SubLaneScale) {
15798 int NumSubLanes = NumLanes * SubLaneScale;
15799 int NumSubLaneElts = NumLaneElts / SubLaneScale;
15804 int TopSrcSubLane = -1;
15810 for (
int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
15815 for (
int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
15816 int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
15819 int Lane = (M % NumElts) / NumLaneElts;
15820 if ((0 <= SrcLane) && (SrcLane != Lane))
15823 int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
15824 SubLaneMask[Elt] = LocalM;
15832 for (
int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
15834 for (
int i = 0; i != NumSubLaneElts; ++i) {
15835 if (
M1[i] < 0 || M2[i] < 0)
15837 if (
M1[i] != M2[i])
15843 auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
15844 if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
15848 for (
int i = 0; i != NumSubLaneElts; ++i) {
15849 int M = SubLaneMask[i];
15852 assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
15853 "Unexpected mask element");
15854 RepeatedSubLaneMask[i] = M;
15859 int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
15860 TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
15861 Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
15866 if (Dst2SrcSubLanes[DstSubLane] < 0)
15869 assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
15870 "Unexpected source lane");
15874 for (
int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
15875 int Lane = SubLane / SubLaneScale;
15876 auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
15877 for (
int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
15878 int M = RepeatedSubLaneMask[Elt];
15881 int Idx = (SubLane * NumSubLaneElts) + Elt;
15882 RepeatedMask[
Idx] = M + (Lane * NumLaneElts);
15888 for (
int i = 0; i != NumElts; i += NumSubLaneElts) {
15889 int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
15890 if (SrcSubLane < 0)
15892 for (
int j = 0; j != NumSubLaneElts; ++j)
15893 SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
15898 if (RepeatedMask == Mask || SubLaneMask == Mask)
15912 int MinSubLaneScale = 1, MaxSubLaneScale = 1;
15915 MinSubLaneScale = 2;
15917 (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2;
15919 if (Subtarget.hasBWI() && VT == MVT::v64i8)
15920 MinSubLaneScale = MaxSubLaneScale = 4;
15922 for (
int Scale = MinSubLaneScale; Scale <= MaxSubLaneScale; Scale *= 2)
15923 if (
SDValue Shuffle = ShuffleSubLanes(Scale))
15930 bool &ForceV1Zero,
bool &ForceV2Zero,
15932 const APInt &Zeroable) {
15935 (NumElts == 2 || NumElts == 4 || NumElts == 8) &&
15936 "Unexpected data type for VSHUFPD");
15938 "Illegal shuffle mask");
15940 bool ZeroLane[2] = {
true,
true };
15941 for (
int i = 0; i < NumElts; ++i)
15942 ZeroLane[i & 1] &= Zeroable[i];
15946 bool IsSHUFPD =
true;
15947 bool IsCommutable =
true;
15949 for (
int i = 0; i < NumElts; ++i) {
15954 int Val = (i & 6) + NumElts * (i & 1);
15955 int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
15956 if (Mask[i] < Val || Mask[i] > Val + 1)
15958 if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
15959 IsCommutable =
false;
15960 SHUFPDMask[i] = Mask[i] % 2;
15963 if (!IsSHUFPD && !IsCommutable)
15966 if (!IsSHUFPD && IsCommutable)
15969 ForceV1Zero = ZeroLane[0];
15970 ForceV2Zero = ZeroLane[1];
15977 const APInt &Zeroable,
15980 assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&
15981 "Unexpected data type for VSHUFPD");
15983 unsigned Immediate = 0;
15984 bool ForceV1Zero =
false, ForceV2Zero =
false;
16005 const APInt &Zeroable,
16007 assert(VT == MVT::v32i8 &&
"Unexpected type!");
16014 if (Zeroable.
countl_one() < (Mask.size() - 8))
16026 { 0, 1, 2, 3, 16, 17, 18, 19,
16027 4, 5, 6, 7, 20, 21, 22, 23 });
16054 if (VT != MVT::v8f32 && VT != MVT::v8i32 && VT != MVT::v16i16 &&
16058 auto IsInterleavingPattern = [&](
ArrayRef<int> Mask,
unsigned Begin0,
16060 size_t Size = Mask.size();
16061 assert(
Size % 2 == 0 &&
"Expected even mask size");
16062 for (
unsigned I = 0;
I <
Size;
I += 2) {
16063 if (Mask[
I] != (
int)(Begin0 +
I / 2) ||
16064 Mask[
I + 1] != (
int)(Begin1 +
I / 2))
16071 size_t FirstQtr = NumElts / 2;
16072 size_t ThirdQtr = NumElts + NumElts / 2;
16073 bool IsFirstHalf = IsInterleavingPattern(Mask, 0, NumElts);
16074 bool IsSecondHalf = IsInterleavingPattern(Mask, FirstQtr, ThirdQtr);
16075 if (!IsFirstHalf && !IsSecondHalf)
16085 if (Shuffles.
size() != 2)
16088 auto *SVN1 = cast<ShuffleVectorSDNode>(Shuffles[0]);
16089 auto *SVN2 = cast<ShuffleVectorSDNode>(Shuffles[1]);
16092 if (IsInterleavingPattern(SVN1->getMask(), 0, NumElts) &&
16093 IsInterleavingPattern(SVN2->getMask(), FirstQtr, ThirdQtr)) {
16094 FirstHalf = Shuffles[0];
16095 SecondHalf = Shuffles[1];
16096 }
else if (IsInterleavingPattern(SVN1->getMask(), FirstQtr, ThirdQtr) &&
16097 IsInterleavingPattern(SVN2->getMask(), 0, NumElts)) {
16098 FirstHalf = Shuffles[1];
16099 SecondHalf = Shuffles[0];
16128 assert(V2.getSimpleValueType() == MVT::v4f64 &&
"Bad operand type!");
16129 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
16135 if (V2.isUndef()) {
16138 Mask, Subtarget, DAG))
16148 unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
16149 ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
16162 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16167 Mask, DAG, Subtarget))
16180 Zeroable, Subtarget, DAG))
16185 Zeroable, Subtarget, DAG))
16196 !
all_of(Mask, [](
int M) {
return M < 2 || (4 <= M && M < 6); }) &&
16203 if (V1IsInPlace || V2IsInPlace)
16205 Zeroable, Subtarget, DAG);
16210 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16217 if (!(Subtarget.
hasAVX2() && (V1IsInPlace || V2IsInPlace)))
16219 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16223 if (Subtarget.hasVLX())
16225 Zeroable, Subtarget, DAG))
16232 Zeroable, Subtarget, DAG);
16248 assert(V2.getSimpleValueType() == MVT::v4i64 &&
"Bad operand type!");
16249 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
16250 assert(Subtarget.
hasAVX2() &&
"We can only lower v4i64 with AVX2!");
16257 Zeroable, Subtarget, DAG))
16266 if (Subtarget.preferLowerShuffleAsShift())
16269 Subtarget, DAG,
true))
16272 if (V2.isUndef()) {
16299 if (Subtarget.hasVLX()) {
16301 Zeroable, Subtarget, DAG))
16305 Zeroable, Subtarget, DAG))
16323 if (V1IsInPlace || V2IsInPlace)
16325 Zeroable, Subtarget, DAG);
16330 DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
16342 if (!V1IsInPlace && !V2IsInPlace)
16344 DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
16349 Zeroable, Subtarget, DAG);
16361 assert(V2.getSimpleValueType() == MVT::v8f32 &&
"Bad operand type!");
16362 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
16365 Zeroable, Subtarget, DAG))
16383 Zeroable, Subtarget, DAG))
16391 "Repeated masks must be half the mask width!");
16415 DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
16420 if (V2.isUndef()) {
16437 DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
16441 if (Subtarget.hasVLX())
16443 Zeroable, Subtarget, DAG))
16467 Zeroable, Subtarget, DAG);
16483 assert(V2.getSimpleValueType() == MVT::v8i32 &&
"Bad operand type!");
16484 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
16485 assert(Subtarget.
hasAVX2() &&
"We can only lower v8i32 with AVX2!");
16487 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 8; });
16493 Zeroable, Subtarget, DAG))
16512 Zeroable, Subtarget, DAG))
16521 if (Subtarget.preferLowerShuffleAsShift()) {
16524 Subtarget, DAG,
true))
16526 if (NumV2Elements == 0)
16536 bool Is128BitLaneRepeatedShuffle =
16538 if (Is128BitLaneRepeatedShuffle) {
16539 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
16555 if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements == 0)
16561 if (Subtarget.hasVLX()) {
16563 Zeroable, Subtarget, DAG))
16567 Zeroable, Subtarget, DAG))
16579 DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
16582 if (V2.isUndef()) {
16601 CastV1, CastV2, DAG);
16608 DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
16613 Zeroable, Subtarget, DAG);
16625 assert(V2.getSimpleValueType() == MVT::v16i16 &&
"Bad operand type!");
16626 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
16627 assert(Subtarget.
hasAVX2() &&
"We can only lower v16i16 with AVX2!");
16633 DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
16642 Zeroable, Subtarget, DAG))
16662 Subtarget, DAG,
false))
16673 DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
16676 if (V2.isUndef()) {
16691 DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
16704 DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
16709 Zeroable, Subtarget, DAG))
16713 if (Subtarget.hasBWI())
16719 DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
16724 DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
16748 assert(V2.getSimpleValueType() == MVT::v32i8 &&
"Bad operand type!");
16749 assert(Mask.size() == 32 &&
"Unexpected mask size for v32 shuffle!");
16750 assert(Subtarget.
hasAVX2() &&
"We can only lower v32i8 with AVX2!");
16756 Zeroable, Subtarget, DAG))
16765 Zeroable, Subtarget, DAG))
16802 DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
16814 DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
16822 Zeroable, Subtarget, DAG))
16826 if (Subtarget.hasVBMI())
16832 DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
16837 DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
16843 if (Subtarget.hasVLX())
16845 Mask, Zeroable, DAG))
16872 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
16874 if (NumV2Elements == 1 && Mask[0] >= NumElts)
16876 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
16892 if (ElementBits < 32) {
16910 if (VT == MVT::v16f16 || VT == MVT::v16bf16) {
16942 "Unexpected element type size for 128bit shuffle.");
16952 assert(Widened128Mask.
size() == 4 &&
"Shuffle widening mismatch");
16955 if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 &&
16956 (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) {
16957 unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4;
16968 bool OnlyUsesV1 =
isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2);
16970 isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) {
16980 bool IsInsert =
true;
16982 for (
int i = 0; i < 4; ++i) {
16983 assert(Widened128Mask[i] >= -1 &&
"Illegal shuffle sentinel value");
16984 if (Widened128Mask[i] < 0)
16988 if (Widened128Mask[i] < 4) {
16989 if (Widened128Mask[i] != i) {
16995 if (V2Index >= 0 || Widened128Mask[i] != 4) {
17002 if (IsInsert && V2Index >= 0) {
17015 Widened128Mask.
clear();
17021 int PermMask[4] = {-1, -1, -1, -1};
17023 for (
int i = 0; i < 4; ++i) {
17024 assert(Widened128Mask[i] >= -1 &&
"Illegal shuffle sentinel value");
17025 if (Widened128Mask[i] < 0)
17028 SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1;
17035 PermMask[i] = Widened128Mask[i] % 4;
17048 assert(V2.getSimpleValueType() == MVT::v8f64 &&
"Bad operand type!");
17049 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
17051 if (V2.isUndef()) {
17053 if (
isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2))
17059 unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
17060 ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) |
17061 ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
17062 ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
17074 V2, Subtarget, DAG))
17082 Zeroable, Subtarget, DAG))
17090 Zeroable, Subtarget, DAG))
17102 assert(V2.getSimpleValueType() == MVT::v16f32 &&
"Bad operand type!");
17103 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
17109 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
17126 Zeroable, Subtarget, DAG))
17134 Zeroable, Subtarget, DAG))
17138 DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
17144 DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG))
17149 if (V2.isUndef() &&
17157 Zeroable, Subtarget, DAG))
17169 assert(V2.getSimpleValueType() == MVT::v8i64 &&
"Bad operand type!");
17170 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
17173 if (Subtarget.preferLowerShuffleAsShift())
17176 Subtarget, DAG,
true))
17179 if (V2.isUndef()) {
17201 V2, Subtarget, DAG))
17212 Zeroable, Subtarget, DAG))
17216 if (Subtarget.hasBWI())
17230 Zeroable, Subtarget, DAG))
17242 assert(V2.getSimpleValueType() == MVT::v16i32 &&
"Bad operand type!");
17243 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
17245 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 16; });
17251 DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
17255 if (Subtarget.preferLowerShuffleAsShift()) {
17258 Subtarget, DAG,
true))
17260 if (NumV2Elements == 0)
17270 bool Is128BitLaneRepeatedShuffle =
17272 if (Is128BitLaneRepeatedShuffle) {
17273 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
17286 Subtarget, DAG,
false))
17289 if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements != 0)
17296 Zeroable, Subtarget, DAG))
17300 if (Subtarget.hasBWI())
17311 CastV1, CastV2, DAG);
17318 DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
17323 Zeroable, Subtarget, DAG))
17327 Zeroable, Subtarget, DAG))
17339 assert(V2.getSimpleValueType() == MVT::v32i16 &&
"Bad operand type!");
17340 assert(Mask.size() == 32 &&
"Unexpected mask size for v32 shuffle!");
17341 assert(Subtarget.hasBWI() &&
"We can only lower v32i16 with AVX-512-BWI!");
17347 DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
17362 Subtarget, DAG,
false))
17370 if (V2.isUndef()) {
17382 RepeatedMask, Subtarget, DAG);
17387 Zeroable, Subtarget, DAG))
17391 Zeroable, Subtarget, DAG))
17398 DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
17410 assert(V2.getSimpleValueType() == MVT::v64i8 &&
"Bad operand type!");
17411 assert(Mask.size() == 64 &&
"Unexpected mask size for v64 shuffle!");
17412 assert(Subtarget.hasBWI() &&
"We can only lower v64i8 with AVX-512-BWI!");
17418 DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
17449 Zeroable, Subtarget, DAG))
17453 Zeroable, Subtarget, DAG))
17459 DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
17463 DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget))
17467 Zeroable, Subtarget, DAG))
17474 Mask, Subtarget, DAG))
17479 bool V1InUse, V2InUse;
17481 DAG, V1InUse, V2InUse);
17488 DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
17492 if (Subtarget.hasVBMI())
17505 const APInt &Zeroable,
17509 "Cannot lower 512-bit vectors w/ basic ISA!");
17513 int NumElts = Mask.size();
17514 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
17516 if (NumV2Elements == 1 && Mask[0] >= NumElts)
17518 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
17531 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {
17543 if (VT == MVT::v32f16 || VT == MVT::v32bf16) {
17544 if (!Subtarget.hasBWI())
17586 int NumElts = Mask.size();
17587 for (
int i = 0; i != NumElts; ++i) {
17590 "Unexpected mask index.");
17595 if (ShiftAmt < 0) {
17602 if (ShiftAmt != M - i)
17605 assert(ShiftAmt >= 0 &&
"All undef?");
17619 int MaskOffset,
const APInt &Zeroable) {
17620 int Size = Mask.size();
17622 auto CheckZeros = [&](
int Shift,
bool Left) {
17623 for (
int j = 0; j < Shift; ++j)
17624 if (!Zeroable[j + (
Left ? 0 : (
Size - Shift))])
17630 auto MatchShift = [&](
int Shift,
bool Left) {
17631 unsigned Pos =
Left ? Shift : 0;
17632 unsigned Low =
Left ? 0 : Shift;
17633 unsigned Len =
Size - Shift;
17637 for (
int Shift = 1; Shift !=
Size; ++Shift)
17638 for (
bool Left : {
true,
false})
17639 if (CheckZeros(Shift,
Left) && MatchShift(Shift,
Left)) {
17654 const APInt &Zeroable,
17658 "Cannot lower 512-bit vectors w/o basic ISA!");
17660 int NumElts = Mask.size();
17661 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
17664 int SubvecElts = 0;
17666 for (
int i = 0; i != NumElts; ++i) {
17667 if (Mask[i] >= 0) {
17671 Src = Mask[i] / NumElts;
17672 if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i)
17678 assert(SubvecElts != NumElts &&
"Identity shuffle?");
17681 SubvecElts = llvm::bit_floor<uint32_t>(SubvecElts);
17685 if ((
int)Zeroable.
countl_one() >= (NumElts - SubvecElts)) {
17686 assert(Src >= 0 &&
"Expected a source!");
17706 if (ShiftAmt >= 0) {
17717 ShiftAmt += WideElts - NumElts;
17720 Res = DAG.
getNode(Opcode,
DL, WideVT, Res,
17747 ExtVT = MVT::v2i64;
17750 ExtVT = MVT::v4i32;
17755 ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
17765 assert(Subtarget.hasBWI() &&
"Expected AVX512BW support");
17773 ExtVT = MVT::v64i8;
17783 if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
17784 (Subtarget.hasDQI() && (NumElems < 32)))
17794 int NumElements = Mask.size();
17796 int NumV1Elements = 0, NumV2Elements = 0;
17800 else if (M < NumElements)
17808 if (NumV2Elements > NumV1Elements)
17811 assert(NumV1Elements > 0 &&
"No V1 indices");
17813 if (NumV2Elements == 0)
17821 if (NumV1Elements == NumV2Elements) {
17822 int LowV1Elements = 0, LowV2Elements = 0;
17823 for (
int M : Mask.slice(0, NumElements / 2))
17824 if (M >= NumElements)
17828 if (LowV2Elements > LowV1Elements)
17830 if (LowV2Elements == LowV1Elements) {
17831 int SumV1Indices = 0, SumV2Indices = 0;
17832 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
17833 if (Mask[i] >= NumElements)
17835 else if (Mask[i] >= 0)
17837 if (SumV2Indices < SumV1Indices)
17839 if (SumV2Indices == SumV1Indices) {
17840 int NumV1OddIndices = 0, NumV2OddIndices = 0;
17841 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
17842 if (Mask[i] >= NumElements)
17843 NumV2OddIndices += i % 2;
17844 else if (Mask[i] >= 0)
17845 NumV1OddIndices += i % 2;
17846 if (NumV2OddIndices < NumV1OddIndices)
17860 if (!V.getValueType().isSimple())
17863 MVT VT = V.getSimpleValueType().getScalarType();
17864 if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())
17869 if ((VT == MVT::i16 || VT == MVT::i8) &&
17870 V.getSimpleValueType().getSizeInBits() < 512)
17873 auto HasMaskOperation = [&](
SDValue V) {
17876 switch (V->getOpcode()) {
17895 if (!V->hasOneUse())
17901 if (HasMaskOperation(V))
17926 MVT VT =
Op.getSimpleValueType();
17932 "Can't lower MMX shuffles");
17934 bool V1IsUndef = V1.
isUndef();
17935 bool V2IsUndef = V2.isUndef();
17936 if (V1IsUndef && V2IsUndef)
17949 any_of(OrigMask, [NumElements](
int M) {
return M >= NumElements; })) {
17951 for (
int &M : NewMask)
17952 if (M >= NumElements)
17958 int MaskUpperLimit = OrigMask.
size() * (V2IsUndef ? 1 : 2);
17959 (void)MaskUpperLimit;
17961 [&](
int M) {
return -1 <= M && M < MaskUpperLimit; }) &&
17962 "Out of bounds shuffle index");
17967 APInt KnownUndef, KnownZero;
17970 APInt Zeroable = KnownUndef | KnownZero;
17996 int NewNumElts = NumElements / 2;
18004 bool UsedZeroVector =
false;
18006 "V2's non-undef elements are used?!");
18007 for (
int i = 0; i != NewNumElts; ++i)
18009 WidenedMask[i] = i + NewNumElts;
18010 UsedZeroVector =
true;
18014 if (UsedZeroVector)
18035 assert(NumElements == (
int)Mask.size() &&
18036 "canonicalizeShuffleMaskWithHorizOp "
18037 "shouldn't alter the shuffle mask size");
18066 "Need AVX512 for custom VECTOR_COMPRESS lowering.");
18081 if (NumVecBits != 128 && NumVecBits != 256)
18084 if (NumElementBits == 32 || NumElementBits == 64) {
18085 unsigned NumLargeElements = 512 / NumElementBits;
18093 Subtarget, DAG,
DL);
18097 Subtarget, DAG,
DL);
18105 if (VecVT == MVT::v8i16 || VecVT == MVT::v8i8 || VecVT == MVT::v16i8 ||
18106 VecVT == MVT::v16i16) {
18111 Passthru = Passthru.
isUndef()
18130 MVT VT =
Op.getSimpleValueType();
18149 MVT VT =
Op.getSimpleValueType();
18171 MVT CondVT =
Cond.getSimpleValueType();
18172 unsigned CondEltSize =
Cond.getScalarValueSizeInBits();
18173 if (CondEltSize == 1)
18184 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
18197 return DAG.
getSelect(dl, VT, Mask, LHS, RHS);
18201 if (CondEltSize != EltSize) {
18218 !Subtarget.hasXOP()) {
18224 if (FreeCond && (FreeLHS || FreeRHS))
18244 case MVT::v16i16: {
18257 MVT VT =
Op.getSimpleValueType();
18260 assert(isa<ConstantSDNode>(
Idx) &&
"Constant index expected");
18275 unsigned IdxVal =
Idx->getAsZExtVal();
18281 if (VT == MVT::f32) {
18287 if (!
Op.hasOneUse())
18292 User->getValueType(0) != MVT::i32))
18299 if (VT == MVT::i32 || VT == MVT::i64)
18313 auto* IdxC = dyn_cast<ConstantSDNode>(
Idx);
18314 MVT EltVT =
Op.getSimpleValueType();
18317 "Unexpected vector type in ExtractBitFromMaskVector");
18325 if (NumElts == 1) {
18337 unsigned IdxVal = IdxC->getZExtValue();
18354 MVT VT =
N->getSimpleValueType(0);
18358 switch (
User->getOpcode()) {
18364 return DemandedElts;
18366 DemandedElts.
setBit(
User->getConstantOperandVal(1));
18369 if (!
User->getValueType(0).isSimple() ||
18370 !
User->getValueType(0).isVector()) {
18372 return DemandedElts;
18380 return DemandedElts;
18383 return DemandedElts;
18387X86TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
18393 auto* IdxC = dyn_cast<ConstantSDNode>(
Idx);
18432 unsigned IdxVal = IdxC->getZExtValue();
18446 IdxVal &= ElemsPerChunk - 1;
18453 MVT VT =
Op.getSimpleValueType();
18455 if (VT == MVT::i16) {
18460 if (Subtarget.hasFP16())
18480 if (VT == MVT::i8) {
18485 int DWordIdx = IdxVal / 4;
18486 if (DWordIdx == 0 && DemandedElts == (DemandedElts & 15)) {
18490 int ShiftVal = (IdxVal % 4) * 8;
18497 int WordIdx = IdxVal / 2;
18498 if (DemandedElts == (DemandedElts & (3 << (WordIdx * 2)))) {
18502 int ShiftVal = (IdxVal % 2) * 8;
18516 Mask[0] =
static_cast<int>(IdxVal);
18532 int Mask[2] = { 1, -1 };
18551 if (!isa<ConstantSDNode>(
Idx)) {
18570 MVT VT =
Op.getSimpleValueType();
18575 if (EltVT == MVT::i1)
18582 auto *N2C = dyn_cast<ConstantSDNode>(N2);
18584 if (EltVT == MVT::bf16) {
18596 if (!(Subtarget.hasBWI() ||
18597 (Subtarget.
hasAVX512() && EltSizeInBits >= 32) ||
18598 (Subtarget.
hasSSE41() && (EltVT == MVT::f32 || EltVT == MVT::f64))))
18611 for (
unsigned I = 0;
I != NumElts; ++
I)
18616 return DAG.
getSelectCC(dl, IdxSplat, Indices, EltSplat, N0,
18620 if (N2C->getAPIntValue().uge(NumElts))
18622 uint64_t IdxVal = N2C->getZExtValue();
18627 if (IsZeroElt || IsAllOnesElt) {
18630 if (IsAllOnesElt &&
18631 ((VT == MVT::v16i8 && !Subtarget.
hasSSE41()) ||
18632 ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.
hasInt256()))) {
18636 CstVectorElts[IdxVal] = OnesCst;
18645 for (
unsigned i = 0; i != NumElts; ++i)
18646 BlendMask.
push_back(i == IdxVal ? i + NumElts : i);
18662 if ((Subtarget.
hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
18663 (Subtarget.
hasAVX2() && (EltVT == MVT::i32 || EltVT == MVT::i64))) {
18670 unsigned NumEltsIn128 = 128 / EltSizeInBits;
18672 "Vectors will always have power-of-two number of elements.");
18677 if (IdxVal >= NumEltsIn128 &&
18678 ((Subtarget.
hasAVX2() && EltSizeInBits != 8) ||
18679 (Subtarget.
hasAVX() && (EltSizeInBits >= 32) &&
18683 for (
unsigned i = 0; i != NumElts; ++i)
18684 BlendMask.
push_back(i == IdxVal ? i + NumElts : i);
18693 unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
18705 if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
18706 EltVT == MVT::f16 || EltVT == MVT::i64) {
18713 if (EltVT == MVT::i16 || EltVT == MVT::i8) {
18724 if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.
hasSSE41())) {
18726 if (VT == MVT::v8i16) {
18730 assert(VT == MVT::v16i8 &&
"PINSRB requires v16i8 vector");
18735 assert(N1.getValueType() != MVT::i32 &&
"Unexpected VT");
18738 return DAG.
getNode(Opc, dl, VT, N0, N1, N2);
18742 if (EltVT == MVT::f32) {
18772 if (EltVT == MVT::i32 || EltVT == MVT::i64)
18782 MVT OpVT =
Op.getSimpleValueType();
18803 "Expected an SSE type!");
18807 if (OpVT == MVT::v4i32 || (OpVT == MVT::v8i16 && Subtarget.hasFP16()))
18820 assert(
Op.getSimpleValueType().getVectorElementType() == MVT::i1);
18827 assert(
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
18828 "Only vXi1 extract_subvectors need custom lowering");
18832 uint64_t IdxVal =
Op.getConstantOperandVal(1);
18849unsigned X86TargetLowering::getGlobalWrapperKind(
18850 const GlobalValue *GV,
const unsigned char OpFlags)
const {
18884 CP->getConstVal(), PtrVT,
CP->getAlign(),
CP->getOffset(), OpFlag);
18887 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlag),
DL, PtrVT, Result);
18909 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlag),
DL, PtrVT, Result);
18922 return LowerGlobalOrExternal(
Op, DAG,
false);
18928 unsigned char OpFlags =
18930 const BlockAddress *BA = cast<BlockAddressSDNode>(
Op)->getBlockAddress();
18931 int64_t
Offset = cast<BlockAddressSDNode>(
Op)->getOffset();
18936 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlags), dl, PtrVT, Result);
18950 bool ForCall)
const {
18955 const char *ExternalSym =
nullptr;
18956 if (
const auto *
G = dyn_cast<GlobalAddressSDNode>(
Op)) {
18957 GV =
G->getGlobal();
18960 const auto *ES = cast<ExternalSymbolSDNode>(
Op);
18961 ExternalSym = ES->getSymbol();
18966 unsigned char OpFlags;
18984 int64_t GlobalOffset = 0;
18997 if (ForCall && !NeedsLoad && !HasPICReg &&
Offset == 0)
19000 Result = DAG.
getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
19025 return LowerGlobalOrExternal(
Op, DAG,
false);
19029 const EVT PtrVT,
unsigned ReturnReg,
19030 unsigned char OperandFlags,
19031 bool LoadGlobalBaseReg =
false,
19032 bool LocalDynamic =
false) {
19040 if (LocalDynamic && UseTLSDESC) {
19047 "Unexpected TLSDESC DAG");
19051 "Unexpected TLSDESC DAG");
19053 auto *CopyFromRegOp = CallSeqEndOp->getGluedUser();
19055 "Unexpected TLSDESC DAG");
19056 Ret =
SDValue(CopyFromRegOp, 0);
19069 if (LoadGlobalBaseReg) {
19075 Chain = DAG.
getNode(CallType, dl, NodeTys, {Chain, TGA, InGlue});
19077 Chain = DAG.
getNode(CallType, dl, NodeTys, {Chain, TGA});
19125 bool Is64Bit,
bool Is64BitLP64) {
19135 unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
19174 unsigned char OperandFlags = 0;
19231 if (Subtarget.is64Bit()) {
19243 PositionIndependent);
19250 unsigned char OpFlag = 0;
19251 unsigned WrapperKind = 0;
19255 bool PIC32 = PositionIndependent && !Subtarget.is64Bit();
19290 unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
19316 SDValue TlsArray = Subtarget.is64Bit()
19331 if (Subtarget.is64Bit())
19362 if (Subtarget.is64Bit() && Subtarget.
isTargetELF()) {
19403 "Unexpected opcode!");
19404 bool IsStrict =
Op->isStrictFPOpcode();
19405 unsigned OpNo = IsStrict ? 1 : 0;
19407 MVT SrcVT = Src.getSimpleValueType();
19408 MVT VT =
Op.getSimpleValueType();
19410 if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
19411 (VT != MVT::f32 && VT != MVT::f64))
19417 unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
19424 {Op.getOperand(0), InVec});
19444 "Unexpected opcode!");
19445 bool IsStrict =
Op->isStrictFPOpcode();
19446 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
19447 MVT SrcVT = Src.getSimpleValueType();
19448 MVT VT =
Op.getSimpleValueType();
19450 if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
19455 assert(Subtarget.hasFP16() &&
"Expected FP16");
19459 SDValue CvtVec = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
19460 {Op.getOperand(0), InVec});
19478 if (!Subtarget.
hasSSE2() || FromVT != MVT::v4i32)
19481 return ToVT == MVT::v4f32 || (Subtarget.
hasAVX() && ToVT == MVT::v4f64);
19485 if (!Subtarget.
hasAVX512() || FromVT != MVT::v4i32)
19488 return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
19506 !isa<ConstantSDNode>(Extract.
getOperand(1)))
19527 if (FromVT != Vec128VT)
19551 MVT SrcVT =
X.getSimpleValueType();
19552 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
19557 if (!Subtarget.
hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
19569 unsigned ToIntOpcode =
19571 unsigned ToFPOpcode =
19590 bool IsStrict =
Op->isStrictFPOpcode();
19591 MVT VT =
Op->getSimpleValueType(0);
19592 SDValue Src =
Op->getOperand(IsStrict ? 1 : 0);
19594 if (Subtarget.hasDQI()) {
19595 assert(!Subtarget.hasVLX() &&
"Unexpected features");
19597 assert((Src.getSimpleValueType() == MVT::v2i64 ||
19598 Src.getSimpleValueType() == MVT::v4i64) &&
19599 "Unsupported custom type");
19602 assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
19604 MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
19614 Res = DAG.
getNode(
Op.getOpcode(),
DL, {WideVT, MVT::Other},
19615 {Op->getOperand(0), Src});
19618 Res = DAG.
getNode(
Op.getOpcode(),
DL, WideVT, Src);
19631 if (VT != MVT::v4f32 || IsSigned)
19643 for (
int i = 0; i != 4; ++i) {
19649 {
Op.getOperand(0), Elt});
19650 Chains[i] = SignCvts[i].getValue(1);
19661 {Chain, SignCvt, SignCvt});
19678 bool IsStrict =
Op->isStrictFPOpcode();
19679 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
19681 MVT VT =
Op.getSimpleValueType();
19689 DAG.
getNode(
Op.getOpcode(), dl, {NVT, MVT::Other}, {Chain, Src}),
19692 DAG.
getNode(
Op.getOpcode(), dl, NVT, Src), Rnd);
19697 if (VT == MVT::v4i32 && Subtarget.
hasSSE2() && IsSigned)
19699 if (VT == MVT::v8i32 && Subtarget.
hasAVX() && IsSigned)
19701 if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32))
19704 if (VT == MVT::v16i32)
19706 if (VT == MVT::v8i64 && Subtarget.hasDQI())
19709 if (Subtarget.hasDQI() && Subtarget.hasVLX() &&
19710 (VT == MVT::v2i64 || VT == MVT::v4i64))
19717 bool IsStrict =
Op->isStrictFPOpcode();
19718 unsigned OpNo = IsStrict ? 1 : 0;
19721 MVT SrcVT = Src.getSimpleValueType();
19722 MVT VT =
Op.getSimpleValueType();
19731 return LowerWin64_INT128_TO_FP(
Op, DAG);
19740 if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
19752 if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64)
19758 assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
19759 "Unknown SINT_TO_FP to lower!");
19765 if (SrcVT == MVT::i32 && UseSSEReg)
19767 if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit())
19776 if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
19785 if (VT == MVT::f128 || !Subtarget.hasX87())
19789 if (SrcVT == MVT::i64 && Subtarget.
hasSSE2() && !Subtarget.is64Bit())
19793 ValueToStore = DAG.
getBitcast(MVT::f64, ValueToStore);
19803 Chain = DAG.
getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment);
19804 std::pair<SDValue, SDValue> Tmp =
19805 BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG);
19820 Tys = DAG.
getVTList(MVT::f80, MVT::Other);
19822 Tys = DAG.
getVTList(DstVT, MVT::Other);
19824 SDValue FILDOps[] = {Chain, Pointer};
19828 Chain = Result.getValue(1);
19838 SDValue FSTOps[] = {Chain, Result, StackSlot};
19846 DstVT,
DL, Chain, StackSlot,
19848 Chain = Result.getValue(1);
19851 return { Result, Chain };
19860 bool HasFastHOps = Subtarget.hasFastHorizontalOps();
19861 return !IsSingleSource || IsOptimizingSize || HasFastHOps;
19871 assert(!
Op->isStrictFPOpcode() &&
"Expected non-strict uint_to_fp!");
19888 static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
19896 APInt(64, 0x4330000000000000ULL))));
19899 APInt(64, 0x4530000000000000ULL))));
19913 MVT::v2f64, dl, CLod0.
getValue(1), CPIdx1,
19936 unsigned OpNo =
Op.getNode()->isStrictFPOpcode() ? 1 : 0;
19939 llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::f64);
19958 if (
Op.getNode()->isStrictFPOpcode()) {
19963 {Chain,
Or, Bias});
19970 Sub, Sub.
getValue(1), dl,
Op.getSimpleValueType());
19972 return DAG.
getMergeValues({ResultPair.first, ResultPair.second}, dl);
19986 if (
Op.getSimpleValueType() != MVT::v2f64)
19989 bool IsStrict =
Op->isStrictFPOpcode();
19991 SDValue N0 =
Op.getOperand(IsStrict ? 1 : 0);
19995 if (!Subtarget.hasVLX()) {
20003 {Op.getOperand(0), N0});
20015 {
Op.getOperand(0), N0});
20025 llvm::bit_cast<double>(0x4330000000000000ULL),
DL, MVT::v2f64);
20032 {
Op.getOperand(0),
Or, VBias});
20039 bool IsStrict =
Op->isStrictFPOpcode();
20040 SDValue V =
Op->getOperand(IsStrict ? 1 : 0);
20041 MVT VecIntVT = V.getSimpleValueType();
20042 assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
20043 "Unsupported custom type");
20047 assert(!Subtarget.hasVLX() &&
"Unexpected features");
20048 MVT VT =
Op->getSimpleValueType(0);
20051 if (VT == MVT::v8f64)
20054 assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
20056 MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
20057 MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
20067 {
Op->getOperand(0), V});
20081 if (Subtarget.
hasAVX() && VecIntVT == MVT::v4i32 &&
20082 Op->getSimpleValueType(0) == MVT::v4f64) {
20102 {
Op.getOperand(0),
Or, VBias});
20118 bool Is128 = VecIntVT == MVT::v4i32;
20119 MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
20122 if (VecFloatVT !=
Op->getSimpleValueType(0))
20143 MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
20184 {
Op.getOperand(0), HighBitcast, VecCstFSub});
20186 {FHigh.
getValue(1), LowBitcast, FHigh});
20196 unsigned OpNo =
Op.getNode()->isStrictFPOpcode() ? 1 : 0;
20216 bool IsStrict =
Op->isStrictFPOpcode();
20217 unsigned OpNo = IsStrict ? 1 : 0;
20221 MVT SrcVT = Src.getSimpleValueType();
20222 MVT DstVT =
Op->getSimpleValueType(0);
20226 if (DstVT == MVT::f128)
20238 return LowerWin64_INT128_TO_FP(
Op, DAG);
20244 (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
20251 if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
20266 if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.
hasSSE2() &&
20271 if (SrcVT == MVT::i32 && Subtarget.
hasSSE2() && DstVT != MVT::f80 &&
20274 if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
20275 (DstVT == MVT::f32 || DstVT == MVT::f64))
20280 int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
20281 Align SlotAlign(8);
20284 if (SrcVT == MVT::i32) {
20287 SDValue Store1 = DAG.
getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
20290 std::pair<SDValue, SDValue> Tmp =
20291 BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG);
20298 assert(SrcVT == MVT::i64 &&
"Unexpected type in UINT_TO_FP");
20304 ValueToStore = DAG.
getBitcast(MVT::f64, ValueToStore);
20307 DAG.
getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign);
20324 APInt FF(64, 0x5F80000000000000ULL);
20327 Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign();
20346 if (Subtarget.
isOSWindows() && DstVT == MVT::f32)
20350 DAG.
getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
20352 if (DstVT == MVT::f80)
20360 if (Subtarget.
isOSWindows() && DstVT == MVT::f32)
20377 bool IsStrict =
Op->isStrictFPOpcode();
20380 EVT DstTy =
Op.getValueType();
20385 if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
20394 bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;
20398 if (!IsSigned && DstTy != MVT::i64) {
20401 assert(DstTy == MVT::i32 &&
"Unexpected FP_TO_UINT");
20405 assert(DstTy.getSimpleVT() <= MVT::i64 &&
20406 DstTy.getSimpleVT() >= MVT::i16 &&
20407 "Unknown FP_TO_INT to lower!");
20412 unsigned MemSize = DstTy.getStoreSize();
20421 if (UnsignedFixup) {
20441 bool LosesInfo =
false;
20442 if (TheVT == MVT::f64)
20446 else if (TheVT == MVT::f80)
20451 "FP conversion should have been exact");
20461 Chain =
Cmp.getValue(1);
20486 { Chain,
Value, FltOfs });
20487 Chain =
Value.getValue(1);
20497 assert(DstTy == MVT::i64 &&
"Invalid FP_TO_SINT to lower!");
20500 SDValue Ops[] = { Chain, StackSlot };
20503 assert(FLDSize <= MemSize &&
"Stack slot not big enough");
20507 Chain =
Value.getValue(1);
20530 MVT VT =
Op.getSimpleValueType();
20532 MVT InVT = In.getSimpleValueType();
20533 unsigned Opc =
Op.getOpcode();
20537 "Unexpected extension opcode");
20539 "Expected same number of elements");
20543 "Unexpected element type");
20547 "Unexpected element type");
20551 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
20552 assert(InVT == MVT::v32i8 &&
"Unexpected VT!");
20576 if (
auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
20592 assert((VT == MVT::v16i8 || VT == MVT::v16i16) &&
"Unexpected VT.");
20606 MVT VT =
Op->getSimpleValueType(0);
20608 MVT InVT = In.getSimpleValueType();
20622 if (!Subtarget.hasBWI()) {
20631 MVT WideVT = ExtVT;
20656 return SelectedVal;
20662 MVT SVT = In.getSimpleValueType();
20681 "Unexpected PACK opcode");
20688 EVT SrcVT = In.getValueType();
20691 if (SrcVT == DstVT)
20701 assert(SrcSizeInBits > DstSizeInBits &&
"Illegal truncation");
20709 EVT InVT = MVT::i16, OutVT = MVT::i8;
20718 if (SrcSizeInBits <= 128) {
20735 if (
Hi.isUndef()) {
20742 unsigned SubSizeInBits = SrcSizeInBits / 2;
20744 OutVT =
EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
20765 int Scale = 64 / OutVT.getScalarSizeInBits();
20778 assert(SrcSizeInBits >= 256 &&
"Expected 256-bit vector or greater");
20810 EVT SrcVT = In.getValueType();
20827 EVT SrcVT = In.getValueType();
20834 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20835 (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
20838 assert(NumSrcEltBits > NumDstEltBits &&
"Bad truncation");
20839 unsigned NumStages =
Log2_32(NumSrcEltBits / NumDstEltBits);
20844 if ((DstSVT == MVT::i32 && SrcVT.
getSizeInBits() <= 128) ||
20845 (DstSVT == MVT::i16 && SrcVT.
getSizeInBits() <= (64 * NumStages)) ||
20846 (DstVT == MVT::v2i8 && SrcVT == MVT::v2i64 && Subtarget.
hasSSSE3()))
20851 if (SrcVT == MVT::v4i64 && DstVT == MVT::v4i32 &&
20857 if (Subtarget.
hasAVX512() && NumStages > 1)
20860 unsigned NumPackedSignBits = std::min<unsigned>(NumDstEltBits, 16);
20861 unsigned NumPackedZeroBits = Subtarget.
hasSSE41() ? NumPackedSignBits : 8;
20882 if (DstSVT == MVT::i32 && NumSignBits != NumSrcEltBits &&
20886 unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20887 if (MinSignBits < NumSignBits) {
20895 if (In.getOpcode() ==
ISD::SRL && In->hasOneUse())
20897 if (*ShAmt == MinSignBits) {
20913 MVT SrcVT = In.getSimpleValueType();
20916 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20917 (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
20933 unsigned PackOpcode;
20946 MVT SrcVT = In.getSimpleValueType();
20950 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20951 (DstSVT == MVT::i8 || DstSVT == MVT::i16) &&
isPowerOf2_32(NumElems) &&
20956 if (Subtarget.
hasSSSE3() && NumElems == 8) {
20957 if (SrcSVT == MVT::i16)
20959 if (SrcSVT == MVT::i32 && (DstSVT == MVT::i8 || !Subtarget.
hasSSE41()))
20978 if (Subtarget.
hasSSE41() || DstSVT == MVT::i8)
20981 if (SrcSVT == MVT::i16 || SrcSVT == MVT::i32)
20985 if (DstSVT == MVT::i16 && SrcSVT == MVT::i64) {
20997 MVT VT =
Op.getSimpleValueType();
20999 MVT InVT = In.getSimpleValueType();
21005 if (Subtarget.hasBWI()) {
21021 "Unexpected vector type.");
21023 assert((NumElts == 8 || NumElts == 16) &&
"Unexpected number of elements");
21035 if (InVT == MVT::v16i8) {
21039 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
21042 assert(InVT == MVT::v16i16 &&
"Unexpected VT!");
21068 if (Subtarget.hasDQI())
21075 MVT VT =
Op.getSimpleValueType();
21077 MVT InVT =
In.getSimpleValueType();
21079 "Invalid TRUNCATE operation");
21084 if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
21086 assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
21087 "Unexpected subtarget!");
21129 if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
21130 assert(VT == MVT::v32i8 &&
"Unexpected VT!");
21138 if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
21146 if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
21149 static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
21160 static const int ShufMask[] = {0, 2, 4, 6};
21162 DAG.
getBitcast(MVT::v4i32, OpHi), ShufMask);
21165 if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
21169 static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
21170 -1, -1, -1, -1, -1, -1, -1, -1,
21171 16, 17, 20, 21, 24, 25, 28, 29,
21172 -1, -1, -1, -1, -1, -1, -1, -1 };
21177 static const int ShufMask2[] = {0, 2, -1, -1};
21189 if (VT == MVT::v16i8 && InVT == MVT::v16i16)
21200 MVT SrcVT = Src.getSimpleValueType();
21202 assert(DstBits == 32 &&
"expandFP_TO_UINT_SSE - only vXi32 supported");
21221 if (VT == MVT::v8i32 && !Subtarget.
hasAVX2()) {
21234 bool IsStrict =
Op->isStrictFPOpcode();
21237 MVT VT =
Op->getSimpleValueType(0);
21238 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
21240 MVT SrcVT = Src.getSimpleValueType();
21247 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
21248 {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
21249 {NVT, MVT::Other}, {Chain, Src})});
21250 return DAG.
getNode(
Op.getOpcode(), dl, VT,
21256 if (VT.isVector()) {
21257 if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
21258 MVT ResVT = MVT::v4i32;
21259 MVT TruncVT = MVT::v4i1;
21266 if (!IsSigned && !Subtarget.hasVLX()) {
21269 ResVT = MVT::v8i32;
21270 TruncVT = MVT::v8i1;
21271 Opc =
Op.getOpcode();
21281 Res = DAG.
getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
21284 Res = DAG.
getNode(Opc, dl, ResVT, Src);
21296 if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
21301 if (EleVT != MVT::i64)
21302 ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
21304 if (SrcVT != MVT::v8f16) {
21315 dl, {ResVT, MVT::Other}, {Chain, Src});
21338 if (VT.getVectorElementType() == MVT::i16) {
21341 "Expected f32/f64 vector!");
21346 dl, {NVT, MVT::Other}, {Chain, Src});
21362 if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
21363 assert(!IsSigned &&
"Expected unsigned conversion!");
21369 if ((VT == MVT::v4i32 || VT == MVT::v8i32) &&
21370 (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32) &&
21372 assert(!IsSigned &&
"Expected unsigned conversion!");
21373 assert(!Subtarget.hasVLX() &&
"Unexpected features!");
21374 MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
21375 MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
21401 if ((VT == MVT::v2i64 || VT == MVT::v4i64) &&
21402 (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32) &&
21404 assert(!Subtarget.hasVLX() &&
"Unexpected features!");
21405 MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
21415 Res = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
21419 Res = DAG.
getNode(
Op.getOpcode(), dl, MVT::v8i64, Src);
21430 if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
21431 if (!Subtarget.hasVLX()) {
21440 Tmp = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
21448 assert(Subtarget.hasDQI() && Subtarget.hasVLX() &&
"Requires AVX512DQVL");
21454 return DAG.
getNode(Opc, dl, {VT, MVT::Other}, {
Op->getOperand(0), Tmp});
21457 return DAG.
getNode(Opc, dl, VT, Tmp);
21462 if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) ||
21463 (VT == MVT::v4i32 && SrcVT == MVT::v4f64) ||
21464 (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) {
21465 assert(!IsSigned &&
"Expected unsigned conversion!");
21474 bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);
21476 if (!IsSigned && UseSSEReg) {
21483 if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) ||
21484 (VT == MVT::i64 && Subtarget.is64Bit()))) {
21485 unsigned DstBits = VT.getScalarSizeInBits();
21515 if (VT == MVT::i64)
21518 assert(VT == MVT::i32 &&
"Unexpected VT!");
21523 if (Subtarget.is64Bit()) {
21546 if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
21547 assert(IsSigned &&
"Expected i16 FP_TO_UINT to have been promoted!");
21562 if (UseSSEReg && IsSigned)
21566 if (SrcVT == MVT::f128) {
21573 MakeLibCallOptions CallOptions;
21574 std::pair<SDValue, SDValue> Tmp =
21575 makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
21584 if (
SDValue V = FP_TO_INTHelper(
Op, DAG, IsSigned, Chain)) {
21590 llvm_unreachable(
"Expected FP_TO_INTHelper to handle all remaining cases.");
21596 EVT DstVT =
Op.getSimpleValueType();
21597 MVT SrcVT = Src.getSimpleValueType();
21602 if (SrcVT == MVT::f16)
21609 return LRINT_LLRINTHelper(
Op.getNode(), DAG);
21614 EVT DstVT =
N->getValueType(0);
21616 EVT SrcVT = Src.getValueType();
21618 if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
21631 EVT OtherVT = UseSSE ? SrcVT : DstVT;
21633 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
21638 assert(DstVT == MVT::i64 &&
"Invalid LRINT/LLRINT to lower!");
21639 Chain = DAG.
getStore(Chain,
DL, Src, StackPtr, MPI);
21646 Chain = Src.getValue(1);
21651 StoreOps, DstVT, MPI, std::nullopt,
21654 return DAG.
getLoad(DstVT,
DL, Chain, StackPtr, MPI);
21671 EVT SrcVT = Src.getValueType();
21672 EVT DstVT =
Node->getValueType(0);
21680 EVT SatVT = cast<VTSDNode>(
Node->getOperand(1))->getVT();
21684 assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
21685 "Expected saturation width smaller than result width");
21688 if (TmpWidth < 32) {
21695 if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
21702 if (SatWidth < TmpWidth)
21707 APInt MinInt, MaxInt;
21732 if (AreExactFloatBounds) {
21733 if (DstVT != TmpVT) {
21741 SDValue FpToInt = DAG.
getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
21755 SDValue FpToInt = DAG.
getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21775 if (DstVT != TmpVT) {
21798 if (!IsSigned || DstVT != TmpVT) {
21809 bool IsStrict =
Op->isStrictFPOpcode();
21812 MVT VT =
Op.getSimpleValueType();
21815 MVT SVT =
In.getSimpleValueType();
21819 if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80 &&
21823 if ((SVT == MVT::v8f16 && Subtarget.hasF16C()) ||
21827 if (SVT == MVT::f16) {
21828 if (Subtarget.hasFP16())
21831 if (VT != MVT::f32) {
21836 {MVT::f32, MVT::Other}, {Chain,
In})});
21842 if (!Subtarget.hasF16C()) {
21846 assert(VT == MVT::f32 && SVT == MVT::f16 &&
"unexpected extend libcall");
21857 Entry.IsSExt =
false;
21858 Entry.IsZExt =
true;
21859 Args.push_back(Entry);
21864 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
21902 assert(Subtarget.hasF16C() &&
"Unexpected features!");
21903 if (SVT == MVT::v2f16)
21910 {
Op->getOperand(0), Res});
21912 }
else if (VT == MVT::v4f64 || VT == MVT::v8f64) {
21916 assert(SVT == MVT::v2f32 &&
"Only customize MVT::v2f32 type legalization!");
21922 {
Op->getOperand(0), Res});
21927 bool IsStrict =
Op->isStrictFPOpcode();
21932 MVT VT =
Op.getSimpleValueType();
21933 MVT SVT =
In.getSimpleValueType();
21935 if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80))
21938 if (VT == MVT::f16 && (SVT == MVT::f64 || SVT == MVT::f32) &&
21939 !Subtarget.hasFP16() && (SVT == MVT::f64 || !Subtarget.hasF16C())) {
21951 Entry.IsSExt =
false;
21952 Entry.IsZExt =
true;
21953 Args.push_back(Entry);
21957 : RTLIB::FPROUND_F32_F16),
21959 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
21976 ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
21977 Subtarget.hasAVXNECONVERT()))
21982 if (VT.
getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
21983 if (!Subtarget.hasF16C() || SVT.
getScalarType() != MVT::f32)
21997 {Chain, Res, Rnd});
22019 bool IsStrict =
Op->isStrictFPOpcode();
22020 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
22021 assert(Src.getValueType() == MVT::i16 &&
Op.getValueType() == MVT::f32 &&
22032 {
Op.getOperand(0), Res});
22048 bool IsStrict =
Op->isStrictFPOpcode();
22049 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
22050 assert(Src.getValueType() == MVT::f32 &&
Op.getValueType() == MVT::i16 &&
22083 MVT SVT =
Op.getOperand(0).getSimpleValueType();
22084 if (SVT == MVT::f32 && ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
22085 Subtarget.hasAVXNECONVERT())) {
22094 MakeLibCallOptions CallOptions;
22097 makeLibCall(DAG, LC, MVT::f16,
Op.getOperand(0), CallOptions,
DL).first;
22113 bool IsFP =
Op.getSimpleValueType().isFloatingPoint();
22114 if (IsFP && !Subtarget.
hasSSE3())
22116 if (!IsFP && !Subtarget.
hasSSSE3())
22122 LHS.getOperand(0) !=
RHS.getOperand(0) ||
22123 !isa<ConstantSDNode>(
LHS.getOperand(1)) ||
22124 !isa<ConstantSDNode>(
RHS.getOperand(1)) ||
22131 switch (
Op.getOpcode()) {
22141 unsigned LExtIndex =
LHS.getConstantOperandVal(1);
22142 unsigned RExtIndex =
RHS.getConstantOperandVal(1);
22143 if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 &&
22147 if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1))
22151 EVT VecVT =
X.getValueType();
22153 unsigned NumLanes =
BitWidth / 128;
22156 "Not expecting illegal vector widths here");
22161 unsigned LaneIdx = LExtIndex / NumEltsPerLane;
22163 LExtIndex %= NumEltsPerLane;
22178 assert((
Op.getValueType() == MVT::f32 ||
Op.getValueType() == MVT::f64) &&
22179 "Only expecting float/double");
22190 MVT VT =
Op.getSimpleValueType();
22197 Point5Pred.
next(
true);
22211 "Wrong opcode for lowering FABS or FNEG.");
22223 MVT VT =
Op.getSimpleValueType();
22225 bool IsF128 = (VT == MVT::f128);
22228 "Unexpected type in LowerFABSorFNEG");
22238 bool IsFakeVector = !VT.
isVector() && !IsF128;
22241 LogicVT = (VT == MVT::f64) ? MVT::v2f64
22242 : (VT == MVT::f32) ? MVT::v4f32
22260 return DAG.
getNode(LogicOp, dl, LogicVT, Operand, Mask);
22265 SDValue LogicNode = DAG.
getNode(LogicOp, dl, LogicVT, Operand, Mask);
22276 MVT VT =
Op.getSimpleValueType();
22287 bool IsF128 = (VT == MVT::f128);
22290 "Unexpected type in LowerFCOPYSIGN");
22299 bool IsFakeVector = !VT.
isVector() && !IsF128;
22302 LogicVT = (VT == MVT::f64) ? MVT::v2f64
22303 : (VT == MVT::f32) ? MVT::v4f32
22323 APFloat APF = Op0CN->getValueAPF();
22335 return !IsFakeVector ?
Or
22343 MVT VT =
Op.getSimpleValueType();
22346 assert((OpVT == MVT::f32 || OpVT == MVT::f64) &&
22347 "Unexpected type for FGETSIGN");
22350 MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64);
22365 if (Src.getValueType().getScalarSizeInBits() < 32)
22376 if (Src.getValueType() == MVT::i64 &&
22419template <
typename F>
22421 EVT VecVT,
EVT CmpVT,
bool HasPT,
F SToV) {
22427 if (VecVT != CmpVT)
22436 if (VecVT != CmpVT)
22455 EVT OpVT =
X.getValueType();
22470 auto IsVectorBitCastCheap = [](
SDValue X) {
22472 return isa<ConstantSDNode>(
X) ||
X.getValueType().isVector() ||
22475 if ((!IsVectorBitCastCheap(
X) || !IsVectorBitCastCheap(
Y)) &&
22476 !IsOrXorXorTreeCCZero)
22482 bool NoImplicitFloatOps =
22484 Attribute::NoImplicitFloat);
22485 if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
22486 ((OpSize == 128 && Subtarget.
hasSSE2()) ||
22487 (OpSize == 256 && Subtarget.
hasAVX()) ||
22489 bool HasPT = Subtarget.
hasSSE41();
22494 bool PreferKOT = Subtarget.preferMaskRegisters();
22495 bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;
22497 EVT VecVT = MVT::v16i8;
22498 EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
22499 if (OpSize == 256) {
22500 VecVT = MVT::v32i8;
22501 CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
22503 EVT CastVT = VecVT;
22504 bool NeedsAVX512FCast =
false;
22505 if (OpSize == 512 || NeedZExt) {
22506 if (Subtarget.hasBWI()) {
22507 VecVT = MVT::v64i8;
22508 CmpVT = MVT::v64i1;
22512 VecVT = MVT::v16i32;
22513 CmpVT = MVT::v16i1;
22514 CastVT = OpSize == 512 ? VecVT
22515 : OpSize == 256 ? MVT::v8i32
22517 NeedsAVX512FCast =
true;
22522 bool TmpZext =
false;
22523 EVT TmpCastVT = CastVT;
22527 if (OrigSize < OpSize) {
22528 if (OrigSize == 128) {
22529 TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
22532 }
else if (OrigSize == 256) {
22533 TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
22540 if (!NeedZExt && !TmpZext)
22548 if (IsOrXorXorTreeCCZero) {
22557 if (VecVT != CmpVT) {
22559 }
else if (HasPT) {
22566 if (VecVT != CmpVT) {
22567 EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64
22568 : CmpVT == MVT::v32i1 ? MVT::i32
22575 DAG.
getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, Cmp);
22584 assert(Cmp.getValueType() == MVT::v16i8 &&
22585 "Non 128-bit vector on pre-SSE41 target");
22603 EVT VT = MVT::Other;
22607 assert(
Op.getOpcode() ==
unsigned(BinOp) &&
22608 "Unexpected bit reduction opcode");
22612 for (
unsigned Slot = 0, e = Opnds.
size(); Slot < e; ++Slot) {
22615 if (
I->getOpcode() ==
unsigned(BinOp)) {
22628 auto *
Idx = dyn_cast<ConstantSDNode>(
I->getOperand(1));
22634 if (M == SrcOpMap.
end()) {
22635 VT = Src.getValueType();
22637 if (!SrcOpMap.
empty() && VT != SrcOpMap.
begin()->first.getValueType())
22641 M = SrcOpMap.
insert(std::make_pair(Src, EltCount)).first;
22646 unsigned CIdx =
Idx->getZExtValue();
22647 if (M->second[CIdx])
22649 M->second.setBit(CIdx);
22655 SrcMask->push_back(SrcOpMap[
SrcOp]);
22658 for (
const auto &
I : SrcOpMap)
22659 if (!
I.second.isAllOnes())
22671 EVT VT =
LHS.getValueType();
22674 assert(ScalarSize == 1 &&
"Element Mask vs Vector bitwidth mismatch");
22689 APInt Mask = OriginalMask;
22691 auto MaskBits = [&](
SDValue Src) {
22692 if (Mask.isAllOnes())
22694 EVT SrcVT = Src.getValueType();
22703 if (IntVT != MVT::i64)
22706 MVT::i32, MVT::i32);
22708 MVT::i32, MVT::i32);
22725 bool UsePTEST = Subtarget.
hasSSE41();
22726 if (!UsePTEST && !Mask.isAllOnes() && ScalarSize > 32)
22730 unsigned TestSize = UseKORTEST ? 512 : (Subtarget.
hasAVX() ? 256 : 128);
22734 if (ScalarSize > TestSize) {
22735 if (!Mask.isAllOnes())
22749 VT = Split.first.getValueType();
22753 }
else if (!UsePTEST && !KnownRHS.
isZero()) {
22756 MVT SVT = ScalarSize >= 32 ? MVT::i32 : MVT::i8;
22765 VT = Split.first.getValueType();
22777 VT = Split.first.getValueType();
22803 MVT MaskVT = ScalarSize >= 32 ? MVT::v4i32 : MVT::v16i8;
22824 if (!CmpNull && !CmpAllOnes)
22828 if (!Subtarget.
hasSSE2() || !
Op->hasOneUse())
22836 switch (
Op.getOpcode()) {
22840 Op.getScalarValueSizeInBits());
22845 if (
auto *Cst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
22846 Mask = Cst->getAPIntValue();
22847 Op =
Op.getOperand(0);
22860 EVT VT = VecIns[0].getValueType();
22862 [VT](
SDValue V) {
return VT == V.getValueType(); }) &&
22863 "Reduction source vector mismatch");
22871 for (
unsigned Slot = 0, e = VecIns.
size(); e - Slot > 1;
22872 Slot += 2, e += 1) {
22883 CC, Mask, Subtarget, DAG, X86CC);
22892 EVT MatchVT =
Match.getValueType();
22896 CC, Mask, Subtarget, DAG, X86CC);
22900 if (Mask.isAllOnes()) {
22901 assert(!
Op.getValueType().isVector() &&
22902 "Illegal vector type for reduction pattern");
22904 if (Src.getValueType().isFixedLengthVector() &&
22905 Src.getValueType().getScalarType() == MVT::i1) {
22911 EVT LHSVT =
LHS.getValueType();
22912 ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
22924 SDValue Inner = Src.getOperand(0);
22926 if (llvm::has_single_bit<uint32_t>(InnerVT.
getSizeInBits())) {
22932 SrcMask, Subtarget, DAG, X86CC);
22978 bool NeedCF =
false;
22979 bool NeedOF =
false;
22992 switch (
Op->getOpcode()) {
22997 if (
Op.getNode()->getFlags().hasNoSignedWrap())
23010 if (
Op.getResNo() != 0 || NeedOF || NeedCF) {
23015 unsigned Opcode = 0;
23016 unsigned NumOperands = 0;
23079 return SDValue(New.getNode(), 1);
23088 return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
23092 assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
23093 CmpVT == MVT::i32 || CmpVT == MVT::i64) &&
"Unexpected VT!");
23099 if (CmpVT == MVT::i16 && !Subtarget.hasFastImm16() &&
23102 auto *COp0 = dyn_cast<ConstantSDNode>(Op0);
23103 auto *COp1 = dyn_cast<ConstantSDNode>(Op1);
23105 if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
23106 (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
23107 unsigned ExtendOp =
23122 Op0 = DAG.
getNode(ExtendOp, dl, CmpVT, Op0);
23123 Op1 = DAG.
getNode(ExtendOp, dl, CmpVT, Op1);
23144 return Add.getValue(1);
23153 return Add.getValue(1);
23167bool X86TargetLowering::optimizeFMulOrFDivAsShiftAddBitcast(
23172 EVT FPVT =
N->getValueType(0);
23187 EVT VT =
Op.getValueType();
23198 return Subtarget.hasFastVectorFSQRT();
23199 return Subtarget.hasFastScalarFSQRT();
23206 int &RefinementSteps,
23207 bool &UseOneConstNR,
23208 bool Reciprocal)
const {
23210 EVT VT =
Op.getValueType();
23220 if ((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
23221 (VT == MVT::v4f32 && Subtarget.
hasSSE1() && Reciprocal) ||
23222 (VT == MVT::v4f32 && Subtarget.
hasSSE2() && !Reciprocal) ||
23223 (VT == MVT::v8f32 && Subtarget.
hasAVX()) ||
23226 RefinementSteps = 1;
23228 UseOneConstNR =
false;
23232 if (RefinementSteps == 0 && !Reciprocal)
23238 Subtarget.hasFP16()) {
23239 assert(Reciprocal &&
"Don't replace SQRT with RSQRT for half type");
23241 RefinementSteps = 0;
23243 if (VT == MVT::f16) {
23260 int &RefinementSteps)
const {
23262 EVT VT =
Op.getValueType();
23271 if ((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
23272 (VT == MVT::v4f32 && Subtarget.
hasSSE1()) ||
23273 (VT == MVT::v8f32 && Subtarget.
hasAVX()) ||
23282 RefinementSteps = 1;
23290 Subtarget.hasFP16()) {
23292 RefinementSteps = 0;
23294 if (VT == MVT::f16) {
23313unsigned X86TargetLowering::combineRepeatedFPDivisors()
const {
23318X86TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
23326 "Unexpected divisor!");
23334 EVT VT =
N->getValueType(0);
23336 if (VT != MVT::i16 && VT != MVT::i32 &&
23337 !(Subtarget.is64Bit() && VT == MVT::i64))
23341 if (Divisor == 2 ||
23368 unsigned AndBitWidth =
And.getValueSizeInBits();
23389 if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
23393 Src.getValueType());
23399 if (!Src.getNode())
23404 Src = Src.getOperand(0);
23425 SDValue &Op1,
bool &IsAlwaysSignaling) {
23438 switch (SetCCOpcode) {
23444 case ISD::SETGT: Swap =
true; [[fallthrough]];
23448 case ISD::SETGE: Swap =
true; [[fallthrough]];
23466 switch (SetCCOpcode) {
23468 IsAlwaysSignaling =
true;
23478 IsAlwaysSignaling =
false;
23490 "Unsupported VTs!");
23514 MVT VT =
Op.getSimpleValueType();
23516 "Cannot set masked compare for this operation");
23526 return DAG.
getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
23535 auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
23536 if (!BV || !V.getValueType().isSimple())
23539 MVT VT = V.getSimpleValueType();
23544 for (
unsigned i = 0; i < NumElts; ++i) {
23545 auto *Elt = dyn_cast<ConstantSDNode>(BV->
getOperand(i));
23550 const APInt &EltC = Elt->getAPIntValue();
23575 if (VET != MVT::i8 && VET != MVT::i16)
23626 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
23627 SDValue Op1 =
Op.getOperand(IsStrict ? 2 : 1);
23629 MVT VT =
Op->getSimpleValueType(0);
23636 assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
23637 EltVT == MVT::f64);
23641 if (Subtarget.
hasAVX512() && !Subtarget.hasVLX())
23658 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
23659 {Chain, Op0, Op1, CC});
23678 (!IsStrict || Subtarget.hasVLX() ||
23683 (Num == 32 && (EltVT == MVT::f16 || EltVT == MVT::bf16)));
23695 bool IsAlwaysSignaling;
23697 if (!Subtarget.
hasAVX()) {
23705 if (IsStrict && IsAlwaysSignaling && !IsSignaling)
23709 if (IsStrict && !IsAlwaysSignaling && IsSignaling) {
23711 Opc, dl, {VT, MVT::Other},
23726 unsigned CombineOpc;
23741 Opc, dl, {VT, MVT::Other},
23744 Opc, dl, {VT, MVT::Other},
23754 Cmp = DAG.
getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
23758 Opc, dl, {VT, MVT::Other},
23760 Chain = Cmp.getValue(1);
23769 SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4;
23771 Opc, dl, {VT, MVT::Other},
23773 Chain = Cmp.getValue(1);
23780 Op.getSimpleValueType().getFixedSizeInBits()) {
23785 Cmp = DAG.
getSetCC(dl,
Op.getSimpleValueType(), Cmp,
23800 assert(!IsStrict &&
"Strict SETCC only handles FP operands.");
23804 "Expected operands with same type!");
23806 "Invalid number of packed elements for source and destination!");
23811 "Value types for source and destination must be the same!");
23818 "Unexpected operand type");
23846 return DAG.
getNode(Opc, dl, VT, Op0, Op1,
23934 bool Invert =
false;
23939 case ISD::SETUGT: Invert =
true; [[fallthrough]];
23941 case ISD::SETULT: Invert =
true; [[fallthrough]];
23951 Result = DAG.
getNOT(dl, Result, VT);
23977 if (VT == MVT::v2i64) {
23988 static const int MaskHi[] = { 1, 1, 3, 3 };
23999 static const int MaskHi[] = { 1, 1, 3, 3 };
24013 static const int MaskLo[] = {0, 0, 2, 2};
24023 : 0x0000000080000000ULL,
24038 static const int MaskHi[] = { 1, 1, 3, 3 };
24039 static const int MaskLo[] = { 0, 0, 2, 2 };
24048 Result = DAG.
getNOT(dl, Result, MVT::v4i32);
24056 assert(Subtarget.
hasSSE2() && !FlipSigns &&
"Don't know how to lower!");
24066 static const int Mask[] = { 1, 0, 3, 2 };
24071 Result = DAG.
getNOT(dl, Result, MVT::v4i32);
24091 Result = DAG.
getNOT(dl, Result, VT);
24109 if (!(Subtarget.
hasAVX512() && VT == MVT::v16i1) &&
24110 !(Subtarget.hasDQI() && VT == MVT::v8i1) &&
24111 !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
24124 bool KTestable =
false;
24125 if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1))
24127 if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))
24205 if (VT == MVT::i32 || VT == MVT::i64 || Op0->
hasOneUse()) {
24236 SDValue EFLAGS =
EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget);
24245 MVT VT =
Op->getSimpleValueType(0);
24249 assert(VT == MVT::i8 &&
"SetCC type must be 8-bit integer");
24251 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
24252 SDValue Op1 =
Op.getOperand(IsStrict ? 2 : 1);
24255 cast<CondCodeSDNode>(
Op.getOperand(IsStrict ? 3 : 2))->get();
24269 "Unexpected setcc expansion!");
24287 if (
auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) {
24288 const APInt &Op1Val = Op1C->getAPIntValue();
24293 APInt Op1ValPlusOne = Op1Val + 1;
24305 SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1,
CC, dl, DAG, X86CC);
24310 if (Subtarget.hasAVX10_2()) {
24329 dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
24347 assert(
LHS.getSimpleValueType().isInteger() &&
"SETCCCARRY is integer only.");
24364static std::pair<SDValue, SDValue>
24366 assert(
Op.getResNo() == 0 &&
"Unexpected result number!");
24370 unsigned BaseOp = 0;
24372 switch (
Op.getOpcode()) {
24404 Overflow =
Value.getValue(1);
24407 return std::make_pair(
Value, Overflow);
24421 assert(
Op->getValueType(1) == MVT::i8 &&
"Unexpected VT!");
24427 unsigned Opc =
Op.getOpcode();
24431 if (
Op.getResNo() == 1 &&
24444 SDValue VOp0 = V.getOperand(0);
24446 unsigned Bits = V.getValueSizeInBits();
24452 unsigned X86CC,
const SDLoc &
DL,
24456 EVT VT =
LHS.getValueType();
24462 auto SplatLSB = [&](
EVT SplatVT) {
24466 if (CmpVT.
bitsGT(SplatVT))
24468 else if (CmpVT.
bitsLT(SplatVT))
24478 return SplatLSB(VT);
24482 isa<ConstantSDNode>(
RHS)) {
24490 auto isIdentityPatternZero = [&]() {
24491 switch (
RHS.getOpcode()) {
24497 if (
RHS.getOperand(0) ==
LHS ||
RHS.getOperand(1) ==
LHS) {
24498 Src1 =
RHS.getOperand(
RHS.getOperand(0) ==
LHS ? 1 : 0);
24507 if (
RHS.getOperand(0) ==
LHS) {
24508 Src1 =
RHS.getOperand(1);
24517 auto isIdentityPatternOnes = [&]() {
24518 switch (
LHS.getOpcode()) {
24522 if (
LHS.getOperand(0) ==
RHS ||
LHS.getOperand(1) ==
RHS) {
24523 Src1 =
LHS.getOperand(
LHS.getOperand(0) ==
RHS ? 1 : 0);
24540 if (!Subtarget.
canUseCMOV() && isIdentityPatternZero()) {
24547 if (!Subtarget.
canUseCMOV() && isIdentityPatternOnes()) {
24584 bool AddTest =
true;
24603 VT ==
Cond.getOperand(0).getSimpleValueType() &&
Cond->hasOneUse()) {
24605 bool IsAlwaysSignaling;
24608 CondOp0, CondOp1, IsAlwaysSignaling);
24618 if (SSECC < 8 || Subtarget.
hasAVX()) {
24638 MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
24643 MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
24664 !
isSoftF16(
Cond.getOperand(0).getSimpleValueType(), Subtarget)) {
24670 Op1 =
Op.getOperand(1);
24671 Op2 =
Op.getOperand(2);
24699 if (Subtarget.
canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) &&
24700 ((CondCode ==
X86::COND_NE && MatchFFSMinus1(Op1, Op2)) ||
24704 DL, DAG, Subtarget)) {
24706 }
else if ((VT == MVT::i32 || VT == MVT::i64) &&
isNullConstant(Op2) &&
24707 Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&
24720 Shift = DAG.
getNOT(
DL, Shift, VT);
24740 unsigned CondOpcode =
Cond.getOpcode();
24743 CC =
Cond.getOperand(0);
24746 bool IllegalFPCMov =
false;
24749 IllegalFPCMov = !
hasFPCMov(cast<ConstantSDNode>(
CC)->getSExtValue());
24811 if (
Op.getValueType() == MVT::i8 &&
24814 if (
T1.getValueType() == T2.getValueType() &&
24829 if ((
Op.getValueType() == MVT::i8 && Subtarget.
canUseCMOV()) ||
24848 MVT VT =
Op->getSimpleValueType(0);
24850 MVT InVT = In.getSimpleValueType();
24866 MVT WideVT = ExtVT;
24877 if ((Subtarget.hasDQI() && WideEltVT.
getSizeInBits() >= 32) ||
24879 V = DAG.
getNode(
Op.getOpcode(), dl, WideVT, In);
24883 V = DAG.
getSelect(dl, WideVT, In, NegOne, Zero);
24903 MVT InVT = In.getSimpleValueType();
24921 MVT VT =
Op->getSimpleValueType(0);
24922 MVT InVT = In.getSimpleValueType();
24928 if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
24930 if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
24938 unsigned Opc =
Op.getOpcode();
24948 InVT = In.getSimpleValueType();
24958 return DAG.
getNode(
Op.getOpcode(), dl, VT, In);
24965 return DAG.
getNode(ExtOpc, dl, VT, In);
24969 if (Subtarget.
hasAVX()) {
24976 for (
int i = 0; i != HalfNumElts; ++i)
24977 HiMask[i] = HalfNumElts + i;
24994 unsigned Scale = InNumElts / NumElts;
24996 for (
unsigned I = 0;
I != NumElts; ++
I)
24997 ShuffleMask.
append(Scale,
I);
25008 if (InVT != MVT::v4i32) {
25009 MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
25018 for (
unsigned i = 0; i != DestElts; ++i)
25019 Mask[i * Scale + (Scale - 1)] = i;
25029 if (VT == MVT::v2i64) {
25033 SignExt = DAG.
getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
25042 MVT VT =
Op->getSimpleValueType(0);
25044 MVT InVT = In.getSimpleValueType();
25052 "Expected same number of elements");
25056 "Unexpected element type");
25060 "Unexpected element type");
25062 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
25063 assert(InVT == MVT::v32i8 &&
"Unexpected VT!");
25083 for (
unsigned i = 0; i != NumElems/2; ++i)
25084 ShufMask[i] = i + NumElems/2;
25094 SDValue StoredVal = Store->getValue();
25097 "Expecting 256/512-bit op");
25104 if (!Store->isSimple())
25109 std::tie(Value0, Value1) =
splitVector(StoredVal, DAG,
DL);
25111 SDValue Ptr0 = Store->getBasePtr();
25115 DAG.
getStore(Store->getChain(),
DL, Value0, Ptr0, Store->getPointerInfo(),
25116 Store->getOriginalAlign(),
25117 Store->getMemOperand()->getFlags());
25119 Store->getPointerInfo().getWithOffset(HalfOffset),
25120 Store->getOriginalAlign(),
25121 Store->getMemOperand()->getFlags());
25129 SDValue StoredVal = Store->getValue();
25132 StoredVal = DAG.
getBitcast(StoreVT, StoredVal);
25137 if (!Store->isSimple())
25146 for (
unsigned i = 0; i != NumElems; ++i) {
25147 unsigned Offset = i * ScalarSize;
25153 Store->getPointerInfo().getWithOffset(
Offset),
25154 Store->getOriginalAlign(),
25155 Store->getMemOperand()->getFlags());
25171 assert(NumElts <= 8 &&
"Unexpected VT");
25174 "Expected AVX512F without AVX512DQI");
25178 DAG.
getUNDEF(MVT::v16i1), StoredVal,
25180 StoredVal = DAG.
getBitcast(MVT::i16, StoredVal);
25201 ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) &&
25202 !Subtarget.hasBWI())) {
25215 "Unexpected type action!");
25224 MVT StVT = Subtarget.is64Bit() && StoreVT.
isInteger() ? MVT::i64 : MVT::f64;
25226 StoredVal = DAG.
getBitcast(CastVT, StoredVal);
25250 MVT RegVT =
Op.getSimpleValueType();
25251 assert(RegVT.
isVector() &&
"We only custom lower vector loads.");
25253 "We only custom lower integer vector loads.");
25263 "Expected AVX512F without AVX512DQI");
25285 Opc =
Op.getOpcode();
25289 Op.getOperand(0).hasOneUse() &&
25291 Op.getOperand(1).hasOneUse());
25302 Cond.getOperand(0).getValueType() != MVT::f128 &&
25303 !
isSoftF16(
Cond.getOperand(0).getValueType(), Subtarget)) {
25323 Overflow,
Op->getFlags());
25326 if (
LHS.getSimpleValueType().isInteger()) {
25330 EFLAGS,
Op->getFlags());
25339 if (
Op.getNode()->hasOneUse()) {
25356 CCVal, Cmp,
Op->getFlags());
25359 Cmp,
Op->getFlags());
25369 Cmp,
Op->getFlags());
25372 Cmp,
Op->getFlags());
25379 Cmp,
Op->getFlags());
25390 Overflow,
Op->getFlags());
25397 EVT CondVT =
Cond.getValueType();
25419X86TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
25425 SplitStack || EmitStackProbeCall;
25433 EVT VT =
Node->getValueType(0);
25439 bool Is64Bit = Subtarget.is64Bit();
25446 assert(
SPReg &&
"Target cannot require DYNAMIC_STACKALLOC expansion and"
25447 " not tell us which reg is the stack pointer!");
25454 Chain =
Result.getValue(1);
25460 if (Alignment && *Alignment > StackAlign)
25465 }
else if (SplitStack) {
25470 for (
const auto &
A :
F.args()) {
25471 if (
A.hasNestAttr())
25473 "have nested arguments.");
25479 Chain =
Result.getValue(1);
25511 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
25514 if (!Subtarget.is64Bit() ||
25532 Op.getOperand(0),
DL,
25540 Op.getOperand(0),
DL,
25557 Op.getOperand(0),
DL, RSFIN, FIN,
25564 assert(Subtarget.is64Bit() &&
25565 "LowerVAARG only handles 64-bit va_arg!");
25575 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
25576 unsigned Align =
Op.getConstantOperandVal(3);
25579 EVT ArgVT =
Op.getNode()->getValueType(0);
25587 assert(ArgVT != MVT::f80 &&
"va_arg for f80 not yet implemented");
25592 "Unhandled argument type in LowerVAARG");
25596 if (ArgMode == 2) {
25598 assert(!Subtarget.useSoftFloat() &&
25605 SDValue InstOps[] = {Chain, SrcPtr,
25615 Chain =
VAARG.getValue(1);
25625 assert(Subtarget.is64Bit() &&
"This code only handles 64-bit va_copy!");
25634 const Value *DstSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
25635 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
25639 Chain,
DL, DstPtr, SrcPtr,
25674 if (VT !=
SrcOp.getSimpleValueType())
25682 if (ShiftAmt >= ElementType.getSizeInBits()) {
25684 ShiftAmt = ElementType.getSizeInBits() - 1;
25690 &&
"Unknown target vector shift-by-constant node");
25726 "Illegal vector splat index");
25729 if (ShAmtIdx != 0) {
25731 Mask[0] = ShAmtIdx;
25748 bool IsMasked =
false;
25757 AmtVT = MVT::v4i32;
25769 {ShAmt.getOperand(1), Mask}))) {
25788 }
else if (Subtarget.
hasSSE41()) {
25790 MVT::v2i64, ShAmt);
25825 assert(MaskVT.
bitsLE(Mask.getSimpleValueType()) &&
"Unexpected mask size!");
25827 if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
25828 assert(MaskVT == MVT::v64i1 &&
"Expected v64i1 mask!");
25829 assert(Subtarget.hasBWI() &&
"Expected AVX512BW target!");
25838 Mask.getSimpleValueType().getSizeInBits());
25854 MVT VT =
Op.getSimpleValueType();
25866 return DAG.
getNode(OpcodeSelect, dl, VT, VMask,
Op, PreservedSrc);
25881 if (
auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
25882 if (MaskConst->getZExtValue() & 0x1)
25885 MVT VT =
Op.getSimpleValueType();
25888 assert(Mask.getValueType() == MVT::i8 &&
"Unexpect type");
25905 "querying registration node size for function without personality");
25914 "can only recover FP for 32-bit MSVC EH personality functions");
25950 if (Subtarget.is64Bit())
25951 return DAG.
getNode(
ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
25958 return DAG.
getNode(
ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
25964 auto isRoundModeCurDirection = [](
SDValue Rnd) {
25965 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd))
25970 auto isRoundModeSAE = [](
SDValue Rnd) {
25971 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd)) {
25972 unsigned RC =
C->getZExtValue();
25984 auto isRoundModeSAEToX = [](
SDValue Rnd,
unsigned &RC) {
25985 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd)) {
25986 RC =
C->getZExtValue();
26001 unsigned IntNo =
Op.getConstantOperandVal(0);
26002 MVT VT =
Op.getSimpleValueType();
26009 switch(IntrData->
Type) {
26014 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26015 if (IntrWithRoundingModeOpcode != 0) {
26018 if (isRoundModeSAEToX(Rnd, RC))
26019 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26022 if (!isRoundModeCurDirection(Rnd))
26032 if (isRoundModeCurDirection(Sae))
26033 Opc = IntrData->
Opc0;
26034 else if (isRoundModeSAE(Sae))
26035 Opc = IntrData->
Opc1;
26039 return DAG.
getNode(Opc, dl,
Op.getValueType(),
Op.getOperand(1));
26047 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26048 if (IntrWithRoundingModeOpcode != 0) {
26051 if (isRoundModeSAEToX(Rnd, RC))
26052 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26053 Op.getOperand(1), Src2,
26055 if (!isRoundModeCurDirection(Rnd))
26060 Op.getOperand(1), Src2);
26066 if (isRoundModeCurDirection(Sae))
26067 Opc = IntrData->
Opc0;
26068 else if (isRoundModeSAE(Sae))
26069 Opc = IntrData->
Opc1;
26073 return DAG.
getNode(Opc, dl,
Op.getValueType(),
Op.getOperand(1),
26090 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26091 if (IntrWithRoundingModeOpcode != 0) {
26094 if (isRoundModeSAEToX(Rnd, RC))
26095 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26098 if (!isRoundModeCurDirection(Rnd))
26103 {Src1, Src2, Src3});
26113 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
26123 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26124 if (IntrWithRoundingModeOpcode != 0) {
26127 if (isRoundModeSAEToX(Rnd, RC))
26129 DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26131 Mask, PassThru, Subtarget, DAG);
26132 if (!isRoundModeCurDirection(Rnd))
26136 DAG.
getNode(IntrData->
Opc0, dl, VT, Src), Mask, PassThru,
26146 if (isRoundModeCurDirection(Rnd))
26147 Opc = IntrData->
Opc0;
26148 else if (isRoundModeSAE(Rnd))
26149 Opc = IntrData->
Opc1;
26161 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26165 bool HasRounding = IntrWithRoundingModeOpcode != 0;
26170 if (isRoundModeSAEToX(Rnd, RC))
26172 DAG.
getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,
26174 Mask, passThru, Subtarget, DAG);
26175 if (!isRoundModeCurDirection(Rnd))
26180 Mask, passThru, Subtarget, DAG);
26184 "Unexpected intrinsic form");
26186 unsigned Opc = IntrData->
Opc0;
26189 if (isRoundModeSAE(Sae))
26190 Opc = IntrWithRoundingModeOpcode;
26191 else if (!isRoundModeCurDirection(Sae))
26196 Mask, passThru, Subtarget, DAG);
26207 if (isRoundModeCurDirection(Rnd))
26208 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2);
26209 else if (isRoundModeSAEToX(Rnd, RC))
26210 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2,
26224 if (isRoundModeCurDirection(Sae))
26225 Opc = IntrData->
Opc0;
26226 else if (isRoundModeSAE(Sae))
26227 Opc = IntrData->
Opc1;
26232 Mask, passThru, Subtarget, DAG);
26240 if (IntrData->
Opc1 != 0) {
26243 if (isRoundModeSAEToX(Rnd, RC))
26244 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2,
26246 else if (!isRoundModeCurDirection(Rnd))
26250 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2);
26259 unsigned Opc = IntrData->
Opc0;
26260 if (IntrData->
Opc1 != 0) {
26262 if (isRoundModeSAE(Sae))
26263 Opc = IntrData->
Opc1;
26264 else if (!isRoundModeCurDirection(Sae))
26269 Mask, PassThru, Subtarget, DAG);
26279 if (isRoundModeCurDirection(Sae))
26280 Opc = IntrData->
Opc0;
26281 else if (isRoundModeSAE(Sae))
26282 Opc = IntrData->
Opc1;
26287 Mask, PassThru, Subtarget, DAG);
26296 unsigned Opc = IntrData->
Opc0;
26297 if (IntrData->
Opc1 != 0) {
26299 if (isRoundModeSAE(Sae))
26300 Opc = IntrData->
Opc1;
26301 else if (!isRoundModeCurDirection(Sae))
26305 Mask, PassThru, Subtarget, DAG);
26316 return DAG.
getNode(IntrData->
Opc0, dl, VT, Src3, Src2, Src1);
26323 return DAG.
getNode(IntrData->
Opc0, dl, VT,Src2, Src1);
26331 MVT VT =
Op.getSimpleValueType();
26341 if (IntrData->
Opc1 != 0) {
26344 if (isRoundModeSAEToX(Rnd, RC))
26345 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2, Src3,
26347 else if (!isRoundModeCurDirection(Rnd))
26351 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2, Src3);
26361 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
26378 MVT MaskVT =
Op.getSimpleValueType();
26384 if (IntrData->
Opc1 != 0) {
26386 if (isRoundModeSAE(Sae))
26387 return DAG.
getNode(IntrData->
Opc1, dl, MaskVT,
Op.getOperand(1),
26388 Op.getOperand(2),
CC, Mask, Sae);
26389 if (!isRoundModeCurDirection(Sae))
26394 {Op.getOperand(1), Op.getOperand(2), CC, Mask});
26403 if (IntrData->
Opc1 != 0) {
26405 if (isRoundModeSAE(Sae))
26407 else if (!isRoundModeCurDirection(Sae))
26411 if (!
Cmp.getNode())
26432 bool HasAVX10_2_COMX =
26437 bool HasAVX10_2_COMX_Ty = (
LHS.getSimpleValueType() != MVT::v8bf16);
26439 auto ComiOpCode = IntrData->
Opc0;
26442 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26451 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26460 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26484 unsigned CondVal =
Op.getConstantOperandVal(3);
26488 if (isRoundModeCurDirection(Sae))
26491 else if (isRoundModeSAE(Sae))
26508 "Unexpected VSHIFT amount type");
26511 if (
auto *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
26513 Op.getSimpleValueType(),
SrcOp,
26514 CShAmt->getZExtValue(), DAG);
26518 SrcOp, ShAmt, 0, Subtarget, DAG);
26522 SDValue DataToCompress =
Op.getOperand(1);
26525 return Op.getOperand(1);
26531 return DAG.
getNode(IntrData->
Opc0, dl, VT, DataToCompress, PassThru,
26545 unsigned Opc = IntrData->
Opc0;
26546 if (IntrData->
Opc1 != 0) {
26548 if (isRoundModeSAE(Sae))
26549 Opc = IntrData->
Opc1;
26550 else if (!isRoundModeCurDirection(Sae))
26554 SDValue FixupImm = DAG.
getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);
26565 uint64_t Round =
Op.getConstantOperandVal(2);
26574 uint64_t Round =
Op.getConstantOperandVal(3);
26584 Op.getValueType());
26586 Op.getOperand(1), Control);
26597 Res = DAG.
getNode(IntrData->
Opc1, dl, VTs,
Op.getOperand(2),
26602 Res = DAG.
getNode(IntrData->
Opc0, dl, VTs,
Op.getOperand(2),
26618 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), Src);
26620 MVT SrcVT = Src.getSimpleValueType();
26624 {Src, PassThru, Mask});
26633 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), {Src, Src2});
26639 {Src, Src2, PassThru, Mask});
26648 unsigned Opc = IntrData->
Opc0;
26649 bool SAE = Src.getValueType().is512BitVector() &&
26650 (isRoundModeSAEToX(Rnd, RC) || isRoundModeSAE(Rnd));
26657 return DAG.
getNode(Opc, dl,
Op.getValueType(), Src, Rnd);
26662 Opc = IntrData->
Opc1;
26663 MVT SrcVT = Src.getSimpleValueType();
26666 return DAG.
getNode(Opc, dl,
Op.getValueType(), Src, Rnd, PassThru, Mask);
26674 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), Src);
26680 return DAG.
getNode(IntrData->
Opc1, dl,
Op.getValueType(), Src, PassThru,
26694 case Intrinsic::x86_avx512_ktestc_b:
26695 case Intrinsic::x86_avx512_ktestc_w:
26696 case Intrinsic::x86_avx512_ktestc_d:
26697 case Intrinsic::x86_avx512_ktestc_q:
26698 case Intrinsic::x86_avx512_ktestz_b:
26699 case Intrinsic::x86_avx512_ktestz_w:
26700 case Intrinsic::x86_avx512_ktestz_d:
26701 case Intrinsic::x86_avx512_ktestz_q:
26702 case Intrinsic::x86_sse41_ptestz:
26703 case Intrinsic::x86_sse41_ptestc:
26704 case Intrinsic::x86_sse41_ptestnzc:
26705 case Intrinsic::x86_avx_ptestz_256:
26706 case Intrinsic::x86_avx_ptestc_256:
26707 case Intrinsic::x86_avx_ptestnzc_256:
26708 case Intrinsic::x86_avx_vtestz_ps:
26709 case Intrinsic::x86_avx_vtestc_ps:
26710 case Intrinsic::x86_avx_vtestnzc_ps:
26711 case Intrinsic::x86_avx_vtestz_pd:
26712 case Intrinsic::x86_avx_vtestc_pd:
26713 case Intrinsic::x86_avx_vtestnzc_pd:
26714 case Intrinsic::x86_avx_vtestz_ps_256:
26715 case Intrinsic::x86_avx_vtestc_ps_256:
26716 case Intrinsic::x86_avx_vtestnzc_ps_256:
26717 case Intrinsic::x86_avx_vtestz_pd_256:
26718 case Intrinsic::x86_avx_vtestc_pd_256:
26719 case Intrinsic::x86_avx_vtestnzc_pd_256: {
26724 case Intrinsic::x86_avx512_ktestc_b:
26725 case Intrinsic::x86_avx512_ktestc_w:
26726 case Intrinsic::x86_avx512_ktestc_d:
26727 case Intrinsic::x86_avx512_ktestc_q:
26732 case Intrinsic::x86_avx512_ktestz_b:
26733 case Intrinsic::x86_avx512_ktestz_w:
26734 case Intrinsic::x86_avx512_ktestz_d:
26735 case Intrinsic::x86_avx512_ktestz_q:
26739 case Intrinsic::x86_avx_vtestz_ps:
26740 case Intrinsic::x86_avx_vtestz_pd:
26741 case Intrinsic::x86_avx_vtestz_ps_256:
26742 case Intrinsic::x86_avx_vtestz_pd_256:
26745 case Intrinsic::x86_sse41_ptestz:
26746 case Intrinsic::x86_avx_ptestz_256:
26750 case Intrinsic::x86_avx_vtestc_ps:
26751 case Intrinsic::x86_avx_vtestc_pd:
26752 case Intrinsic::x86_avx_vtestc_ps_256:
26753 case Intrinsic::x86_avx_vtestc_pd_256:
26756 case Intrinsic::x86_sse41_ptestc:
26757 case Intrinsic::x86_avx_ptestc_256:
26761 case Intrinsic::x86_avx_vtestnzc_ps:
26762 case Intrinsic::x86_avx_vtestnzc_pd:
26763 case Intrinsic::x86_avx_vtestnzc_ps_256:
26764 case Intrinsic::x86_avx_vtestnzc_pd_256:
26767 case Intrinsic::x86_sse41_ptestnzc:
26768 case Intrinsic::x86_avx_ptestnzc_256:
26781 case Intrinsic::x86_sse42_pcmpistria128:
26782 case Intrinsic::x86_sse42_pcmpestria128:
26783 case Intrinsic::x86_sse42_pcmpistric128:
26784 case Intrinsic::x86_sse42_pcmpestric128:
26785 case Intrinsic::x86_sse42_pcmpistrio128:
26786 case Intrinsic::x86_sse42_pcmpestrio128:
26787 case Intrinsic::x86_sse42_pcmpistris128:
26788 case Intrinsic::x86_sse42_pcmpestris128:
26789 case Intrinsic::x86_sse42_pcmpistriz128:
26790 case Intrinsic::x86_sse42_pcmpestriz128: {
26795 case Intrinsic::x86_sse42_pcmpistria128:
26799 case Intrinsic::x86_sse42_pcmpestria128:
26803 case Intrinsic::x86_sse42_pcmpistric128:
26807 case Intrinsic::x86_sse42_pcmpestric128:
26811 case Intrinsic::x86_sse42_pcmpistrio128:
26815 case Intrinsic::x86_sse42_pcmpestrio128:
26819 case Intrinsic::x86_sse42_pcmpistris128:
26823 case Intrinsic::x86_sse42_pcmpestris128:
26827 case Intrinsic::x86_sse42_pcmpistriz128:
26831 case Intrinsic::x86_sse42_pcmpestriz128:
26843 case Intrinsic::x86_sse42_pcmpistri128:
26844 case Intrinsic::x86_sse42_pcmpestri128: {
26846 if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
26853 return DAG.
getNode(Opcode, dl, VTs, NewOps);
26856 case Intrinsic::x86_sse42_pcmpistrm128:
26857 case Intrinsic::x86_sse42_pcmpestrm128: {
26859 if (IntNo == Intrinsic::x86_sse42_pcmpistrm128)
26869 case Intrinsic::eh_sjlj_lsda: {
26874 MCSymbol *S = Context.getOrCreateSymbol(
Twine(
"GCC_except_table") +
26876 return DAG.
getNode(getGlobalWrapperKind(
nullptr, 0), dl, VT,
26880 case Intrinsic::x86_seh_lsda: {
26884 auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
26894 case Intrinsic::eh_recoverfp: {
26896 SDValue IncomingFPOp =
Op.getOperand(2);
26898 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
26901 "llvm.eh.recoverfp must take a function as the first argument");
26905 case Intrinsic::localaddress: {
26911 if (
RegInfo->hasBasePointer(MF))
26914 bool CantUseFP =
RegInfo->hasStackRealignment(MF);
26916 Reg =
RegInfo->getPtrSizedStackRegister(MF);
26918 Reg =
RegInfo->getPtrSizedFrameRegister(MF);
26922 case Intrinsic::x86_avx512_vp2intersect_q_512:
26923 case Intrinsic::x86_avx512_vp2intersect_q_256:
26924 case Intrinsic::x86_avx512_vp2intersect_q_128:
26925 case Intrinsic::x86_avx512_vp2intersect_d_512:
26926 case Intrinsic::x86_avx512_vp2intersect_d_256:
26927 case Intrinsic::x86_avx512_vp2intersect_d_128: {
26928 MVT MaskVT =
Op.getSimpleValueType();
26935 Op->getOperand(1),
Op->getOperand(2));
26943 case Intrinsic::x86_mmx_pslli_w:
26944 case Intrinsic::x86_mmx_pslli_d:
26945 case Intrinsic::x86_mmx_pslli_q:
26946 case Intrinsic::x86_mmx_psrli_w:
26947 case Intrinsic::x86_mmx_psrli_d:
26948 case Intrinsic::x86_mmx_psrli_q:
26949 case Intrinsic::x86_mmx_psrai_w:
26950 case Intrinsic::x86_mmx_psrai_d: {
26954 if (
auto *
C = dyn_cast<ConstantSDNode>(ShAmt)) {
26957 unsigned ShiftAmount =
C->getAPIntValue().getLimitedValue(255);
26958 if (ShiftAmount == 0)
26959 return Op.getOperand(1);
26962 Op.getOperand(0),
Op.getOperand(1),
26966 unsigned NewIntrinsic;
26969 case Intrinsic::x86_mmx_pslli_w:
26970 NewIntrinsic = Intrinsic::x86_mmx_psll_w;
26972 case Intrinsic::x86_mmx_pslli_d:
26973 NewIntrinsic = Intrinsic::x86_mmx_psll_d;
26975 case Intrinsic::x86_mmx_pslli_q:
26976 NewIntrinsic = Intrinsic::x86_mmx_psll_q;
26978 case Intrinsic::x86_mmx_psrli_w:
26979 NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
26981 case Intrinsic::x86_mmx_psrli_d:
26982 NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
26984 case Intrinsic::x86_mmx_psrli_q:
26985 NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
26987 case Intrinsic::x86_mmx_psrai_w:
26988 NewIntrinsic = Intrinsic::x86_mmx_psra_w;
26990 case Intrinsic::x86_mmx_psrai_d:
26991 NewIntrinsic = Intrinsic::x86_mmx_psra_d;
27002 Op.getOperand(1), ShAmt);
27004 case Intrinsic::thread_pointer: {
27015 "Target OS doesn't support __builtin_thread_pointer() yet.");
27025 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27032 EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
27045 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale };
27056 MVT VT =
Op.getSimpleValueType();
27058 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27065 unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
27071 if (Mask.getValueType() != MaskVT)
27072 Mask =
getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27083 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale };
27095 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27102 unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
27103 Src.getSimpleValueType().getVectorNumElements());
27108 if (Mask.getValueType() != MaskVT)
27109 Mask =
getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27114 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale};
27126 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27136 MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
27138 SDValue Ops[] = {VMask,
Base, Scale, Index, Disp, Segment, Chain};
27153 unsigned TargetOpcode,
27161 assert(
N->getNumOperands() == 3 &&
"Unexpected number of operands!");
27167 SDValue N1Ops[] = {Chain, Glue};
27174 if (Subtarget.is64Bit()) {
27183 Chain = HI.getValue(1);
27184 Glue = HI.getValue(2);
27186 if (Subtarget.is64Bit()) {
27216 if (Opcode != X86::RDTSCP)
27245 auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
27263 auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard);
27297 if (!Subtarget.is64Bit())
27306 unsigned IntNo =
Op.getConstantOperandVal(1);
27311 case Intrinsic::swift_async_context_addr: {
27317 X86FI->setHasSwiftAsyncContext(
true);
27329 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
27330 if (!X86FI->getSwiftAsyncContextFrameIdx())
27331 X86FI->setSwiftAsyncContextFrameIdx(
27336 PtrSize == 8 ? MVT::i64 : MVT::i32);
27339 Op->getOperand(0));
27343 case llvm::Intrinsic::x86_seh_ehregnode:
27345 case llvm::Intrinsic::x86_seh_ehguard:
27347 case llvm::Intrinsic::x86_rdpkru: {
27354 case llvm::Intrinsic::x86_wrpkru: {
27359 Op.getOperand(0),
Op.getOperand(2),
27363 case llvm::Intrinsic::asan_check_memaccess: {
27369 case llvm::Intrinsic::x86_flags_read_u32:
27370 case llvm::Intrinsic::x86_flags_read_u64:
27371 case llvm::Intrinsic::x86_flags_write_u32:
27372 case llvm::Intrinsic::x86_flags_write_u64: {
27381 case Intrinsic::x86_lwpins32:
27382 case Intrinsic::x86_lwpins64:
27383 case Intrinsic::x86_umwait:
27384 case Intrinsic::x86_tpause: {
27392 case Intrinsic::x86_umwait:
27395 case Intrinsic::x86_tpause:
27398 case Intrinsic::x86_lwpins32:
27399 case Intrinsic::x86_lwpins64:
27405 DAG.
getNode(Opcode, dl, VTs, Chain,
Op->getOperand(2),
27406 Op->getOperand(3),
Op->getOperand(4));
27411 case Intrinsic::x86_enqcmd:
27412 case Intrinsic::x86_enqcmds: {
27419 case Intrinsic::x86_enqcmd:
27422 case Intrinsic::x86_enqcmds:
27432 case Intrinsic::x86_aesenc128kl:
27433 case Intrinsic::x86_aesdec128kl:
27434 case Intrinsic::x86_aesenc256kl:
27435 case Intrinsic::x86_aesdec256kl: {
27443 case Intrinsic::x86_aesenc128kl:
27446 case Intrinsic::x86_aesdec128kl:
27449 case Intrinsic::x86_aesenc256kl:
27452 case Intrinsic::x86_aesdec256kl:
27461 Opcode,
DL, VTs, {Chain,
Op.getOperand(2),
Op.getOperand(3)}, MemVT,
27466 {ZF, Operation.getValue(0), Operation.getValue(2)});
27468 case Intrinsic::x86_aesencwide128kl:
27469 case Intrinsic::x86_aesdecwide128kl:
27470 case Intrinsic::x86_aesencwide256kl:
27471 case Intrinsic::x86_aesdecwide256kl: {
27474 {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
27475 MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
27481 case Intrinsic::x86_aesencwide128kl:
27484 case Intrinsic::x86_aesdecwide128kl:
27487 case Intrinsic::x86_aesencwide256kl:
27490 case Intrinsic::x86_aesdecwide256kl:
27500 {Chain,
Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
27501 Op.getOperand(5),
Op.getOperand(6),
Op.getOperand(7),
27502 Op.getOperand(8),
Op.getOperand(9),
Op.getOperand(10)},
27507 {ZF, Operation.getValue(1), Operation.getValue(2),
27508 Operation.getValue(3), Operation.getValue(4),
27509 Operation.getValue(5), Operation.getValue(6),
27510 Operation.getValue(7), Operation.getValue(8),
27511 Operation.getValue(9)});
27513 case Intrinsic::x86_testui: {
27522 case Intrinsic::x86_t2rpntlvwz0rs_internal:
27523 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
27524 case Intrinsic::x86_t2rpntlvwz1rs_internal:
27525 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
27526 case Intrinsic::x86_t2rpntlvwz0_internal:
27527 case Intrinsic::x86_t2rpntlvwz0t1_internal:
27528 case Intrinsic::x86_t2rpntlvwz1_internal:
27529 case Intrinsic::x86_t2rpntlvwz1t1_internal: {
27532 unsigned IntNo =
Op.getConstantOperandVal(1);
27537 case Intrinsic::x86_t2rpntlvwz0_internal:
27538 Opc = X86::PT2RPNTLVWZ0V;
27540 case Intrinsic::x86_t2rpntlvwz0t1_internal:
27541 Opc = X86::PT2RPNTLVWZ0T1V;
27543 case Intrinsic::x86_t2rpntlvwz1_internal:
27544 Opc = X86::PT2RPNTLVWZ1V;
27546 case Intrinsic::x86_t2rpntlvwz1t1_internal:
27547 Opc = X86::PT2RPNTLVWZ1T1V;
27549 case Intrinsic::x86_t2rpntlvwz0rs_internal:
27550 Opc = X86::PT2RPNTLVWZ0RSV;
27552 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
27553 Opc = X86::PT2RPNTLVWZ0RST1V;
27555 case Intrinsic::x86_t2rpntlvwz1rs_internal:
27556 Opc = X86::PT2RPNTLVWZ1RSV;
27558 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
27559 Opc = X86::PT2RPNTLVWZ1RST1V;
27583 case Intrinsic::x86_atomic_bts_rm:
27584 case Intrinsic::x86_atomic_btc_rm:
27585 case Intrinsic::x86_atomic_btr_rm: {
27587 MVT VT =
Op.getSimpleValueType();
27591 unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ?
X86ISD::LBTS_RM
27597 {Chain, Op1, Op2}, VT, MMO);
27602 case Intrinsic::x86_atomic_bts:
27603 case Intrinsic::x86_atomic_btc:
27604 case Intrinsic::x86_atomic_btr: {
27606 MVT VT =
Op.getSimpleValueType();
27610 unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ?
X86ISD::LBTS
27617 {Chain, Op1, Op2, Size}, VT, MMO);
27626 case Intrinsic::x86_cmpccxadd32:
27627 case Intrinsic::x86_cmpccxadd64: {
27640 case Intrinsic::x86_aadd32:
27641 case Intrinsic::x86_aadd64:
27642 case Intrinsic::x86_aand32:
27643 case Intrinsic::x86_aand64:
27644 case Intrinsic::x86_aor32:
27645 case Intrinsic::x86_aor64:
27646 case Intrinsic::x86_axor32:
27647 case Intrinsic::x86_axor64: {
27657 case Intrinsic::x86_aadd32:
27658 case Intrinsic::x86_aadd64:
27661 case Intrinsic::x86_aand32:
27662 case Intrinsic::x86_aand64:
27665 case Intrinsic::x86_aor32:
27666 case Intrinsic::x86_aor64:
27669 case Intrinsic::x86_axor32:
27670 case Intrinsic::x86_axor64:
27676 {Chain, Op1, Op2}, VT, MMO);
27678 case Intrinsic::x86_atomic_add_cc:
27679 case Intrinsic::x86_atomic_sub_cc:
27680 case Intrinsic::x86_atomic_or_cc:
27681 case Intrinsic::x86_atomic_and_cc:
27682 case Intrinsic::x86_atomic_xor_cc: {
27693 case Intrinsic::x86_atomic_add_cc:
27696 case Intrinsic::x86_atomic_sub_cc:
27699 case Intrinsic::x86_atomic_or_cc:
27702 case Intrinsic::x86_atomic_and_cc:
27705 case Intrinsic::x86_atomic_xor_cc:
27712 {Chain, Op1, Op2}, VT, MMO);
27721 switch(IntrData->
Type) {
27734 SDValue(Result.getNode(), 1)};
27739 SDValue(Result.getNode(), 2));
27749 Scale, Chain, Subtarget);
27771 Scale, Chain, Subtarget);
27774 const APInt &HintVal =
Op.getConstantOperandAPInt(6);
27775 assert((HintVal == 2 || HintVal == 3) &&
27776 "Wrong prefetch hint in intrinsic: should be 2 or 3");
27777 unsigned Opcode = (HintVal == 2 ? IntrData->
Opc1 : IntrData->
Opc0);
27823 SDValue DataToTruncate =
Op.getOperand(3);
27828 assert(MemIntr &&
"Expected MemIntrinsicSDNode!");
27833 switch (TruncationOp) {
27877 unsigned Depth =
Op.getConstantOperandVal(0);
27882 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
27907 EVT VT =
Op.getValueType();
27915 int FrameAddrIndex = FuncInfo->
getFAIndex();
27916 if (!FrameAddrIndex) {
27918 unsigned SlotSize =
RegInfo->getSlotSize();
27920 SlotSize, 0,
false);
27926 unsigned FrameReg =
27929 unsigned Depth =
Op.getConstantOperandVal(0);
27930 assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
27931 (FrameReg == X86::EBP && VT == MVT::i32)) &&
27932 "Invalid Frame Register!");
27947 .
Case(
"esp", X86::ESP)
27948 .
Case(
"rsp", X86::RSP)
27949 .
Case(
"ebp", X86::EBP)
27950 .
Case(
"rbp", X86::RBP)
27951 .
Case(
"r14", X86::R14)
27952 .
Case(
"r15", X86::R15)
27955 if (Reg == X86::EBP || Reg == X86::RBP) {
27956 if (!TFI.
hasFP(MF))
27958 " is allocatable: function has no frame pointer");
27963 assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
27964 "Invalid Frame Register!");
27982 const Constant *PersonalityFn)
const {
27990 const Constant *PersonalityFn)
const {
27993 return X86::NoRegister;
28010 assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
28011 (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
28012 "Invalid Frame Register!");
28014 Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
28021 Chain = DAG.
getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
28036 if (!Subtarget.is64Bit()) {
28042 Op.getOperand(0),
Op.getOperand(1));
28049 Op.getOperand(0),
Op.getOperand(1));
28060 return Op.getOperand(0);
28071 const Value *TrmpAddr = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
28074 if (Subtarget.is64Bit()) {
28078 const unsigned char JMP64r = 0xFF;
28079 const unsigned char MOV64ri = 0xB8;
28081 const unsigned char N86R10 =
TRI->getEncodingValue(X86::R10) & 0x7;
28082 const unsigned char N86R11 =
TRI->getEncodingValue(X86::R11) & 0x7;
28084 const unsigned char REX_WB = 0x40 | 0x08 | 0x01;
28087 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB;
28099 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB;
28111 OpCode = (JMP64r << 8) | REX_WB;
28117 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6);
28126 cast<Function>(cast<SrcValueSDNode>(
Op.getOperand(5))->getValue());
28137 NestReg = X86::ECX;
28143 if (!
Attrs.isEmpty() && !
Func->isVarArg()) {
28144 unsigned InRegCount = 0;
28148 E = FTy->param_end();
I != E; ++
I, ++
Idx)
28149 if (
Attrs.hasParamAttr(
Idx, Attribute::InReg)) {
28152 InRegCount += (
DL.getTypeSizeInBits(*
I) + 31) / 32;
28155 if (InRegCount > 2) {
28169 NestReg = X86::EAX;
28181 const unsigned char MOV32ri = 0xB8;
28182 const unsigned char N86Reg =
TRI->getEncodingValue(NestReg) & 0x7;
28192 const unsigned char JMP = 0xE9;
28233 MVT VT =
Op.getSimpleValueType();
28244 SDValue Ops[] = {Chain, StackSlot};
28246 DAG.
getVTList(MVT::Other), Ops, MVT::i16, MPI,
28276 SDValue Chain =
Op.getNode()->getOperand(0);
28288 SDValue Ops[] = {Chain, StackSlot};
28299 SDValue NewRM =
Op.getNode()->getOperand(1);
28301 if (
auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) {
28346 SDValue OpsLD[] = {Chain, StackSlot};
28396 auto *
Node = cast<FPStateAccessSDNode>(
Op);
28397 EVT MemVT =
Node->getMemoryVT();
28402 if (Subtarget.hasX87()) {
28405 {Chain, Ptr}, MemVT, MMO);
28414 {Chain,
Ptr}, MemVT, MMO);
28438 if (Subtarget.hasX87())
28441 {Chain, Ptr}, MemVT, MMO);
28462 auto *
Node = cast<FPStateAccessSDNode>(
Op);
28463 EVT MemVT =
Node->getMemoryVT();
28473 SDValue Chain =
Op.getNode()->getOperand(0);
28483 FPEnvVals.
push_back(ConstantInt::get(ItemTy, X87CW));
28485 for (
unsigned I = 0;
I < 6; ++
I)
28490 FPEnvVals.
push_back(ConstantInt::get(ItemTy, 0x1F80));
28504 assert((Amt < 8) &&
"Shift/Rotation amount out of range");
28507 return 0x8040201008040201ULL;
28509 return ((0x0102040810204080ULL >> (Amt)) &
28510 (0x0101010101010101ULL * (0xFF >> (Amt))));
28512 return ((0x0102040810204080ULL << (Amt)) &
28513 (0x0101010101010101ULL * ((0xFF << (Amt)) & 0xFF)));
28516 (0x8080808080808080ULL >> (64 - (8 * Amt))));
28527 MVT VT,
unsigned Amt = 0) {
28529 (VT.
getSizeInBits() % 64) == 0 &&
"Illegal GFNI control type");
28533 uint64_t Bits = (Imm >> (
I % 64)) & 255;
28549 MVT VT =
Op.getSimpleValueType();
28553 assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
28554 "Unsupported element type");
28557 if (NumElems > 16 ||
28563 "Unsupported value type for operation");
28578 MVT VT =
Op.getSimpleValueType();
28584 const int LUT[16] = { 4, 3, 2, 2,
28590 for (
int i = 0; i < NumBytes; ++i)
28606 if (CurrVT.is512BitVector()) {
28624 while (CurrVT != VT) {
28625 int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits();
28626 int CurrNumElts = CurrVT.getVectorNumElements();
28632 if (CurrVT.is512BitVector()) {
28660 MVT VT =
Op.getSimpleValueType();
28662 if (Subtarget.hasCDI() &&
28675 assert(Subtarget.
hasSSSE3() &&
"Expected SSSE3 support for PSHUFB");
28681 MVT VT =
Op.getSimpleValueType();
28685 unsigned Opc =
Op.getOpcode();
28690 Op =
Op.getOperand(0);
28691 if (VT == MVT::i8) {
28720 MVT VT =
Op.getSimpleValueType();
28726 "Only scalar CTTZ requires custom lowering");
28745 MVT VT =
Op.getSimpleValueType();
28748 if (VT == MVT::i16 || VT == MVT::i32)
28751 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28754 assert(
Op.getSimpleValueType().is256BitVector() &&
28755 Op.getSimpleValueType().isInteger() &&
28756 "Only handle AVX 256-bit vector integer operation");
28762 MVT VT =
Op.getSimpleValueType();
28764 unsigned Opcode =
Op.getOpcode();
28767 if (VT == MVT::v32i16 || VT == MVT::v64i8 ||
28769 assert(
Op.getSimpleValueType().isInteger() &&
28770 "Only handle AVX vector integer operation");
28776 EVT SetCCResultType =
28788 if (
C &&
C->getAPIntValue().isSignMask()) {
28801 if (SetCCResultType == VT &&
28809 (!VT.
isVector() || VT == MVT::v2i64)) {
28816 SDValue SumDiff = Result.getValue(0);
28817 SDValue Overflow = Result.getValue(1);
28822 Result = DAG.
getSelect(
DL, VT, SumNeg, SatMax, SatMin);
28823 return DAG.
getSelect(
DL, VT, Overflow, Result, SumDiff);
28832 MVT VT =
Op.getSimpleValueType();
28835 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
28847 if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.
hasSSE41()) {
28855 "Only handle AVX 256-bit vector integer operation");
28859 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
28868 MVT VT =
Op.getSimpleValueType();
28875 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28884 MVT VT =
Op.getSimpleValueType();
28891 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28901 EVT VT =
Op.getValueType();
28909 if (Subtarget.hasAVX10_2() && TLI.
isTypeLegal(VT)) {
28913 else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
28919 return DAG.
getNode(Opc,
DL, VT,
X,
Y, Imm,
Op->getFlags());
28925 APInt OppositeZero = PreferredZero;
28957 if (
auto *CstOp = dyn_cast<ConstantFPSDNode>(
Op))
28958 return CstOp->getValueAPF().bitcastToAPInt() == Zero;
28959 if (
auto *CstOp = dyn_cast<ConstantSDNode>(
Op))
28960 return CstOp->getAPIntValue() == Zero;
28963 for (
const SDValue &OpVal :
Op->op_values()) {
28964 if (OpVal.isUndef())
28966 auto *CstOp = dyn_cast<ConstantFPSDNode>(OpVal);
28969 if (!CstOp->getValueAPF().isZero())
28971 if (CstOp->getValueAPF().bitcastToAPInt() != Zero)
28982 Op->getFlags().hasNoSignedZeros() ||
28986 if (IgnoreSignedZero || MatchesZero(
Y, PreferredZero) ||
28987 MatchesZero(
X, OppositeZero)) {
28991 }
else if (MatchesZero(
X, PreferredZero) || MatchesZero(
Y, OppositeZero)) {
28994 }
else if (!VT.
isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) &&
28995 (
Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {
29014 return DAG.
getNode(MinMaxOp,
DL, VT, NewX, NewY,
Op->getFlags());
29017 if (Subtarget.is64Bit() || VT != MVT::f64) {
29045 Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN);
29066 MVT VT =
Op.getSimpleValueType();
29073 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.
useBWIRegs())
29085 if (VT.
bitsGE(MVT::i32)) {
29120 MVT VT =
Op.getSimpleValueType();
29126 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
29134 if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
29137 unsigned NumEltsPerLane = NumElts / NumLanes;
29139 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29154 bool BIsBuildVector = isa<BuildVectorSDNode>(
B);
29155 bool IsLoLaneAllZeroOrUndef = BIsBuildVector;
29156 bool IsHiLaneAllZeroOrUndef = BIsBuildVector;
29157 if (BIsBuildVector) {
29159 if ((
Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
29165 if (!(IsLoLaneAllZeroOrUndef || IsHiLaneAllZeroOrUndef)) {
29190 for (
unsigned i = 0; i != NumElts; i += 16) {
29191 for (
unsigned j = 0; j != 8; ++j) {
29209 return getPack(DAG, Subtarget, dl, VT, RLo, RHi);
29213 if (VT == MVT::v4i32) {
29215 "Should not custom lower when pmulld is available!");
29218 static const int UnpackMask[] = { 1, -1, 3, -1 };
29236 static const int ShufMask[] = { 0, 4, 2, 6 };
29240 assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
29241 "Only know how to lower V2I64/V4I64/V8I64 multiply");
29242 assert(!Subtarget.hasDQI() &&
"DQI should use MULLQ");
29268 if (!ALoIsZero && !BLoIsZero)
29272 if (!ALoIsZero && !BHiIsZero) {
29278 if (!AHiIsZero && !BLoIsZero) {
29290 MVT VT,
bool IsSigned,
29324 for (
unsigned i = 0; i != NumElts; i += 16) {
29325 for (
unsigned j = 0; j != 8; ++j) {
29326 SDValue LoOp =
B.getOperand(i + j);
29327 SDValue HiOp =
B.getOperand(i + j + 8);
29348 }
else if (IsSigned) {
29363 *
Low =
getPack(DAG, Subtarget, dl, VT, RLo, RHi);
29365 return getPack(DAG, Subtarget, dl, VT, RLo, RHi,
true);
29371 MVT VT =
Op.getSimpleValueType();
29381 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
29384 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
29386 (VT == MVT::v8i32 && Subtarget.
hasInt256()) ||
29387 (VT == MVT::v16i32 && Subtarget.
hasAVX512()));
29401 const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
29402 9, -1, 11, -1, 13, -1, 15, -1};
29428 for (
int i = 0; i != (int)NumElts; ++i)
29429 ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
29435 if (IsSigned && !Subtarget.
hasSSE41()) {
29450 assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
29451 (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
29452 "Unsupported vector type");
29460 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29477 MVT VT =
Op.getSimpleValueType();
29487 EVT OvfVT =
Op->getValueType(1);
29489 if ((VT == MVT::v32i8 && !Subtarget.
hasInt256()) ||
29490 (VT == MVT::v64i8 && !Subtarget.hasBWI())) {
29499 EVT LoOvfVT, HiOvfVT;
29520 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29545 if (!Subtarget.hasBWI()) {
29566 if (!Subtarget.hasBWI()) {
29608 EVT VT =
Op.getValueType();
29610 "Unexpected return type for lowering");
29612 if (isa<ConstantSDNode>(
Op->getOperand(1))) {
29620 switch (
Op->getOpcode()) {
29636 EVT ArgVT =
Op->getOperand(i).getValueType();
29638 "Unexpected argument type for lowering");
29640 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
29645 DAG.
getStore(InChain, dl,
Op->getOperand(i), StackPtr, MPI,
Align(16));
29648 Entry.IsSExt =
false;
29649 Entry.IsZExt =
false;
29650 Args.push_back(Entry);
29657 CLI.setDebugLoc(dl)
29661 static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.
getContext()), Callee,
29675 EVT VT =
Op.getValueType();
29676 bool IsStrict =
Op->isStrictFPOpcode();
29678 SDValue Arg =
Op.getOperand(IsStrict ? 1 : 0);
29682 "Unexpected return type for lowering");
29690 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected request for libcall!");
29693 MakeLibCallOptions CallOptions;
29699 std::tie(Result, Chain) =
29700 makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain);
29708 EVT VT =
Op.getValueType();
29709 bool IsStrict =
Op->isStrictFPOpcode();
29711 SDValue Arg =
Op.getOperand(IsStrict ? 1 : 0);
29715 "Unexpected argument type for lowering");
29723 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected request for libcall!");
29726 MakeLibCallOptions CallOptions;
29731 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
29734 Chain = DAG.
getStore(Chain, dl, Arg, StackPtr, MPI,
Align(16));
29737 std::tie(Result, Chain) =
29738 makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);
29747 "Unexpected shift opcode");
29765 bool AShift = LShift && (Subtarget.
hasAVX512() ||
29766 (VT != MVT::v2i64 && VT != MVT::v4i64));
29767 return (Opcode ==
ISD::SRA) ? AShift : LShift;
29783 "Unexpected shift opcode");
29803 bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
29804 return (Opcode ==
ISD::SRA) ? AShift : LShift;
29809 MVT VT =
Op.getSimpleValueType();
29816 auto ArithmeticShiftRight64 = [&](
uint64_t ShiftAmt) {
29817 assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
"Unexpected SRA type");
29822 if (ShiftAmt == 63 && Subtarget.
hasSSE42()) {
29824 "Unsupported PCMPGT op");
29828 if (ShiftAmt >= 32) {
29833 ShiftAmt - 32, DAG);
29834 if (VT == MVT::v2i64)
29836 if (VT == MVT::v4i64)
29838 {9, 1, 11, 3, 13, 5, 15, 7});
29846 if (VT == MVT::v2i64)
29848 if (VT == MVT::v4i64)
29850 {8, 1, 10, 3, 12, 5, 14, 7});
29856 APInt APIntShiftAmt;
29861 if (APIntShiftAmt.
uge(EltSizeInBits))
29870 if (
Op.getOpcode() ==
ISD::SHL && ShiftAmt == 1) {
29884 if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
29885 (Subtarget.
hasInt256() && VT == MVT::v4i64)) &&
29887 return ArithmeticShiftRight64(ShiftAmt);
29894 Mask = DAG.
getNode(
Op.getOpcode(), dl, VT, Mask, Amt);
29898 if (VT == MVT::v16i8 || (Subtarget.
hasInt256() && VT == MVT::v32i8) ||
29899 (Subtarget.hasBWI() && VT == MVT::v64i8)) {
29904 if (
Op.getOpcode() ==
ISD::SHL && ShiftAmt == 1) {
29915 if (
Op.getOpcode() ==
ISD::SRA && ShiftAmt == 7) {
29918 assert(VT == MVT::v64i8 &&
"Unexpected element type!");
29926 if (VT == MVT::v16i8 && Subtarget.hasXOP())
29929 if (Subtarget.hasGFNI()) {
29970 MVT VT =
Op.getSimpleValueType();
29974 unsigned Opcode =
Op.getOpcode();
29977 int BaseShAmtIdx = -1;
29986 VT == MVT::v64i8) &&
29987 !Subtarget.hasXOP()) {
29998 BaseShAmt, BaseShAmtIdx, Subtarget, DAG);
30008 BaseShAmtIdx, Subtarget, DAG);
30018 BaseShAmtIdx, Subtarget, DAG);
30036 if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
30037 (Subtarget.
hasInt256() && VT == MVT::v16i16) ||
30038 (Subtarget.
hasAVX512() && VT == MVT::v32i16) ||
30039 (!Subtarget.
hasAVX512() && VT == MVT::v16i8) ||
30040 (Subtarget.
hasInt256() && VT == MVT::v32i8) ||
30041 (Subtarget.hasBWI() && VT == MVT::v64i8)))
30051 APInt One(SVTBits, 1);
30053 for (
unsigned I = 0;
I != NumElems; ++
I) {
30054 if (UndefElts[
I] || EltBits[
I].uge(SVTBits))
30056 uint64_t ShAmt = EltBits[
I].getZExtValue();
30064 if (VT == MVT::v4i32) {
30073 if (VT == MVT::v8i16 && !Subtarget.
hasAVX2()) {
30089 MVT VT =
Op.getSimpleValueType();
30097 unsigned Opc =
Op.getOpcode();
30101 assert(VT.
isVector() &&
"Custom lowering only for vector shifts!");
30102 assert(Subtarget.
hasSSE2() &&
"Only custom lower when we have SSE2!");
30116 if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) ||
30117 (VT == MVT::v4i64 && Subtarget.
hasInt256())) &&
30129 if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
30130 VT == MVT::v8i16 || VT == MVT::v16i8)) {
30141 if (VT == MVT::v2i64 && Opc !=
ISD::SRA) {
30153 for (
unsigned I = 0;
I != NumElts; ++
I) {
30155 if (
A.isUndef() ||
A->getAsAPIntVal().uge(EltSizeInBits))
30157 unsigned CstAmt =
A->getAsAPIntVal().getZExtValue();
30158 if (UniqueCstAmt.
count(CstAmt)) {
30159 UniqueCstAmt[CstAmt].setBit(
I);
30164 assert(!UniqueCstAmt.
empty() &&
"Illegal constant shift amounts");
30177 if (UniqueCstAmt.
size() == 2 &&
30178 (VT == MVT::v8i16 || VT == MVT::v4i32 ||
30179 (VT == MVT::v16i16 && Subtarget.
hasInt256()))) {
30180 unsigned AmtA = UniqueCstAmt.
begin()->first;
30181 unsigned AmtB = std::next(UniqueCstAmt.
begin())->first;
30182 const APInt &MaskA = UniqueCstAmt.
begin()->second;
30183 const APInt &MaskB = std::next(UniqueCstAmt.
begin())->second;
30185 for (
unsigned I = 0;
I != NumElts; ++
I) {
30187 ShuffleMask[
I] =
I;
30189 ShuffleMask[
I] =
I + NumElts;
30193 if ((VT != MVT::v16i16 ||
30208 (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
30209 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16) &&
30210 !Subtarget.hasXOP()) {
30220 int WideEltSizeInBits = EltSizeInBits;
30221 while (WideEltSizeInBits < 32) {
30224 if (WideEltSizeInBits >= 16 && !Subtarget.
hasAVX2()) {
30227 TmpAmtWideElts.
resize(AmtWideElts.
size() / 2);
30228 bool SameShifts =
true;
30229 for (
unsigned SrcI = 0, E = AmtWideElts.
size(); SrcI != E; SrcI += 2) {
30230 unsigned DstI = SrcI / 2;
30232 if (AmtWideElts[SrcI].
isUndef() && AmtWideElts[SrcI + 1].
isUndef()) {
30233 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30238 if (AmtWideElts[SrcI].
isUndef()) {
30239 TmpAmtWideElts[DstI] = AmtWideElts[SrcI + 1];
30244 if (AmtWideElts[SrcI + 1].
isUndef()) {
30245 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30249 if (AmtWideElts[SrcI].
getNode()->getAsAPIntVal() ==
30250 AmtWideElts[SrcI + 1].
getNode()->getAsAPIntVal()) {
30251 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30256 SameShifts =
false;
30262 WideEltSizeInBits *= 2;
30263 std::swap(TmpAmtWideElts, AmtWideElts);
30265 APInt APIntShiftAmt;
30269 if (WideEltSizeInBits * AmtWideElts.
size() >= 512 &&
30270 WideEltSizeInBits < 32 && !Subtarget.hasBWI()) {
30271 Profitable =
false;
30275 if (WideEltSizeInBits * AmtWideElts.
size() >= 512 && IsConstantSplat) {
30276 Profitable =
false;
30281 if (EltSizeInBits == 8 && Subtarget.hasGFNI()) {
30282 Profitable =
false;
30288 if (WideEltSizeInBits * AmtWideElts.
size() >= 256 &&
30289 (WideEltSizeInBits < 32 || IsConstantSplat) && !Subtarget.
hasAVX2()) {
30290 Profitable =
false;
30292 unsigned WideNumElts = AmtWideElts.
size();
30294 if (Profitable && WideNumElts != NumElts) {
30305 SDValue ShiftedR = DAG.
getNode(LogicalOpc, dl, WideVT, RWide, AmtWide);
30317 SDValue Mask = DAG.
getNode(LogicalOpc, dl, VT, SplatFullMask, Amt);
30352 SDValue SignBitMask = DAG.
getNode(LogicalOpc, dl, VT, SplatHighBit, Amt);
30357 return Subtraction;
30364 if (Opc ==
ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() ||
30371 if (Opc ==
ISD::SRL && ConstantAmt &&
30372 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.
hasInt256()))) {
30379 return DAG.
getSelect(dl, VT, ZAmt, R, Res);
30387 if (Opc ==
ISD::SRA && ConstantAmt &&
30388 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.
hasInt256())) &&
30389 ((Subtarget.
hasSSE41() && !Subtarget.hasXOP() &&
30402 Res = DAG.
getSelect(dl, VT, Amt0, R, Res);
30403 return DAG.
getSelect(dl, VT, Amt1, Sra1, Res);
30412 if (VT == MVT::v4i32) {
30413 SDValue Amt0, Amt1, Amt2, Amt3;
30424 if (Subtarget.
hasAVX()) {
30433 {4, 5, 6, 7, -1, -1, -1, -1});
30443 unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
30463 if ((VT == MVT::v16i8 && Subtarget.
hasSSSE3()) ||
30464 (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
30465 (VT == MVT::v64i8 && Subtarget.hasBWI())) {
30467 unsigned NumEltsPerLane = NumElts / NumLanes;
30469 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
30470 unsigned LoElt = Lane * NumEltsPerLane;
30476 for (
unsigned I = 0;
I != 8; ++
I) {
30478 LUT.push_back(LaneSplat.
shl(
I));
30486 if (
LUT.size() == NumElts) {
30497 if ((Subtarget.
hasInt256() && VT == MVT::v8i16) ||
30501 (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) {
30502 assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) &&
30503 "Unexpected vector type");
30504 MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
30507 R = DAG.
getNode(ExtOpc, dl, ExtVT, R);
30510 DAG.
getNode(Opc, dl, ExtVT, R, Amt));
30516 (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
30517 (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
30518 !Subtarget.hasXOP()) {
30529 "Constant build vector expected");
30531 if (VT == MVT::v16i8 && Subtarget.
hasInt256()) {
30540 for (
unsigned i = 0; i != NumElts; i += 16) {
30541 for (
int j = 0; j != 8; ++j) {
30552 LoR = DAG.
getNode(X86OpcI, dl, VT16, LoR, Cst8);
30553 HiR = DAG.
getNode(X86OpcI, dl, VT16, HiR, Cst8);
30561 if (VT == MVT::v16i8 ||
30562 (VT == MVT::v32i8 && Subtarget.
hasInt256() && !Subtarget.hasXOP()) ||
30563 (VT == MVT::v64i8 && Subtarget.hasBWI())) {
30578 }
else if (Subtarget.
hasSSE41()) {
30605 R = SignBitSelect(VT, Amt, M, R);
30612 R = SignBitSelect(VT, Amt, M, R);
30619 R = SignBitSelect(VT, Amt, M, R);
30639 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30640 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30649 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30650 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30659 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30660 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30670 if (Subtarget.
hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
30671 MVT ExtVT = MVT::v8i32;
30688 if (VT == MVT::v8i16) {
30691 bool UseSSE41 = Subtarget.
hasSSE41() &&
30727 R = SignBitSelect(Amt, M, R);
30734 R = SignBitSelect(Amt, M, R);
30741 R = SignBitSelect(Amt, M, R);
30748 R = SignBitSelect(Amt, M, R);
30756 if (VT == MVT::v32i16 || VT == MVT::v64i8)
30764 MVT VT =
Op.getSimpleValueType();
30766 "Unexpected funnel shift opcode!");
30776 APInt APIntShiftAmt;
30780 if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
30785 uint64_t ShiftAmt = APIntShiftAmt.
urem(EltSizeInBits);
30788 {Op0, Op1, Imm}, DAG, Subtarget);
30791 {Op0, Op1, Amt}, DAG, Subtarget);
30793 assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
30794 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
30795 VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
30796 "Unexpected funnel shift type!");
30804 uint64_t ShiftAmt = APIntShiftAmt.
urem(EltSizeInBits);
30805 uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt;
30806 uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt);
30807 assert((ShXAmt + ShYAmt) == EltSizeInBits &&
"Illegal funnel shift");
30810 if (EltSizeInBits == 8 &&
30811 (Subtarget.hasXOP() ||
30844 if (IsCst && EltSizeInBits == 16)
30853 if ((VT.
is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) ||
30856 EltSizeInBits < 32)) {
30858 Op = DAG.
getNode(
Op.getOpcode(),
DL, VT, Op0, Op1, AmtMod);
30864 int ScalarAmtIdx = -1;
30867 if (EltSizeInBits == 16)
30873 ScalarAmtIdx, Subtarget, DAG);
30875 ScalarAmtIdx, Subtarget, DAG);
30881 std::min<unsigned>(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32));
30897 EltSizeInBits, DAG);
30899 Res = DAG.
getNode(ShiftOpc,
DL, WideVT, Res, AmtMod);
30902 EltSizeInBits, DAG);
30907 if (((IsCst || !Subtarget.
hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) ||
30923 (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
30924 "Unexpected funnel shift type!");
30928 bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
30932 if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
30933 !isa<ConstantSDNode>(Amt)) {
30950 if (VT == MVT::i8 || ExpandFunnel)
30954 if (VT == MVT::i16) {
30958 return DAG.
getNode(FSHOp,
DL, VT, Op0, Op1, Amt);
30966 MVT VT =
Op.getSimpleValueType();
30967 assert(VT.
isVector() &&
"Custom lowering only for vector rotates!");
30972 unsigned Opcode =
Op.getOpcode();
30978 APInt CstSplatValue;
30982 if (IsCstSplat && CstSplatValue.
urem(EltSizeInBits) == 0)
30986 if ((Subtarget.hasVLX() ||
30987 (Subtarget.
hasAVX512() && Subtarget.hasEVEX512())) &&
30988 32 <= EltSizeInBits) {
31002 if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
31004 return DAG.
getNode(FunnelOpc,
DL, VT, R, R, Amt);
31016 if (Subtarget.hasXOP())
31022 if (IsCstSplat && Subtarget.hasGFNI() && VT.
getScalarType() == MVT::i8 &&
31037 if (Subtarget.hasXOP()) {
31038 assert(IsROTL &&
"Only ROTL expected");
31057 uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
31058 uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
31071 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
31072 ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
31074 ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.
useBWIRegs())) &&
31075 "Only vXi32/vXi16/vXi8 vector rotates supported");
31086 if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
31087 int BaseRotAmtIdx = -1;
31089 if (EltSizeInBits == 16 && Subtarget.
hasSSE41()) {
31091 return DAG.
getNode(FunnelOpc,
DL, VT, R, R, Amt);
31097 BaseRotAmtIdx, Subtarget, DAG);
31099 BaseRotAmtIdx, Subtarget, DAG);
31111 if (!(ConstantAmt && EltSizeInBits != 8) &&
31126 if (EltSizeInBits == 8) {
31144 R = DAG.
getNode(ShiftOpc,
DL, WideVT, R, Amt);
31191 R = SignBitSelect(VT, Amt, M, R);
31201 R = SignBitSelect(VT, Amt, M, R);
31211 return SignBitSelect(VT, Amt, M, R);
31220 if (IsSplatAmt || LegalVarShifts || (Subtarget.
hasAVX2() && !ConstantAmt)) {
31238 assert(IsROTL &&
"Only ROTL supported");
31247 if (EltSizeInBits == 16) {
31256 assert(VT == MVT::v4i32 &&
"Only v4i32 vector rotate expected");
31257 static const int OddMask[] = {1, -1, 3, -1};
31279bool X86TargetLowering::needsCmpXchgNb(
Type *MemType)
const {
31284 if (OpWidth == 128)
31291X86TargetLowering::shouldExpandAtomicStoreInIR(
StoreInst *SI)
const {
31292 Type *MemType =
SI->getValueOperand()->getType();
31294 if (!
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
31295 !Subtarget.useSoftFloat()) {
31297 (Subtarget.
hasSSE1() || Subtarget.hasX87()))
31311X86TargetLowering::shouldExpandAtomicLoadInIR(
LoadInst *LI)
const {
31315 !Subtarget.useSoftFloat()) {
31320 (Subtarget.
hasSSE1() || Subtarget.hasX87()))
31344 if (
auto *
C = dyn_cast<ConstantInt>(V)) {
31354 if (
auto *
I = dyn_cast<Instruction>(V)) {
31361 I = dyn_cast<Instruction>(PeekI);
31371 if (
I->getOpcode() == Instruction::Shl) {
31380 auto *ShiftVal = dyn_cast<ConstantInt>(
I->getOperand(0));
31383 if (ShiftVal->equalsInt(1))
31389 Value *BitV =
I->getOperand(1);
31393 uint64_t ShiftMask =
I->getType()->getPrimitiveSizeInBits() - 1;
31397 return {BitV, BTK};
31404X86TargetLowering::shouldExpandLogicAtomicRMWInIR(
AtomicRMWInst *AI)
const {
31427 I->getOpcode() != Instruction::And ||
31432 unsigned OtherIdx =
I->getOperand(0) == AI ? 1 : 0;
31435 if (AI ==
I->getOperand(OtherIdx))
31441 auto *C2 = dyn_cast<ConstantInt>(
I->getOperand(OtherIdx));
31446 return ~C1->getValue() == C2->getValue()
31460 assert(BitChange.first !=
nullptr && BitTested.first !=
nullptr);
31463 if (BitChange.first != BitTested.first)
31479void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(
AtomicRMWInst *AI)
const {
31481 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31488 IID_C = Intrinsic::x86_atomic_bts;
31489 IID_I = Intrinsic::x86_atomic_bts_rm;
31492 IID_C = Intrinsic::x86_atomic_btc;
31493 IID_I = Intrinsic::x86_atomic_btc_rm;
31496 IID_C = Intrinsic::x86_atomic_btr;
31497 IID_I = Intrinsic::x86_atomic_btr_rm;
31506 assert(BitTested.first !=
nullptr);
31509 auto *
C = cast<ConstantInt>(
I->getOperand(
I->getOperand(0) == AI ? 1 : 0));
31513 {Addr, Builder.getInt8(Imm)});
31522 unsigned ShiftBits =
SI->getType()->getPrimitiveSizeInBits();
31524 Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1));
31531 Result = Builder.CreateIntrinsic(IID_I, AI->
getType(), {Addr, BitPos});
31536 for (
auto It =
I->user_begin(); It !=
I->user_end(); ++It) {
31537 if (
auto *ICmp = dyn_cast<ICmpInst>(*It)) {
31538 if (ICmp->isEquality()) {
31539 auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0));
31540 auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1));
31542 assert(C0 ==
nullptr || C1 ==
nullptr);
31543 if ((C0 ? C0 : C1)->
isZero())
31548 Result = Builder.CreateShl(Result, BitPos);
31553 I->replaceAllUsesWith(Result);
31554 I->eraseFromParent();
31571 if (
match(
I->user_back(),
31574 if (
match(
I->user_back(),
31584 if (
match(
I->user_back(),
31587 if (
match(
I->user_back(),
31600 if (
match(
I->user_back(),
31609 if (
match(
I->user_back(),
31612 if (
match(
I->user_back(),
31622void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
31625 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31632 ICI = cast<ICmpInst>(TempI->
user_back());
31657 IID = Intrinsic::x86_atomic_add_cc;
31660 IID = Intrinsic::x86_atomic_sub_cc;
31663 IID = Intrinsic::x86_atomic_or_cc;
31666 IID = Intrinsic::x86_atomic_and_cc;
31669 IID = Intrinsic::x86_atomic_xor_cc;
31676 {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)});
31686X86TargetLowering::shouldExpandAtomicRMWInIR(
AtomicRMWInst *AI)
const {
31687 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
31712 return shouldExpandLogicAtomicRMWInIR(AI);
31734X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(
AtomicRMWInst *AI)
const {
31735 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
31752 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31753 Module *
M = Builder.GetInsertBlock()->getParent()->getParent();
31789 Builder.CreateCall(MFence, {});
31792 LoadInst *Loaded = Builder.CreateAlignedLoad(
31835 if (Subtarget.is64Bit()) {
31890 MVT T =
Op.getSimpleValueType();
31894 switch(
T.SimpleTy) {
31896 case MVT::i8: Reg = X86::AL;
size = 1;
break;
31897 case MVT::i16: Reg = X86::AX;
size = 2;
break;
31898 case MVT::i32: Reg = X86::EAX;
size = 4;
break;
31900 assert(Subtarget.is64Bit() &&
"Node not type legal!");
31901 Reg = X86::RAX;
size = 8;
31929 MVT InVT = V.getSimpleValueType();
31931 if (InVT == MVT::v64i8) {
31942 if (InVT == MVT::v32i8 && !Subtarget.
hasInt256()) {
31958 MVT SrcVT = Src.getSimpleValueType();
31959 MVT DstVT =
Op.getSimpleValueType();
31963 if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) {
31964 assert(!Subtarget.is64Bit() &&
"Expected 32-bit mode");
31965 assert(Subtarget.hasBWI() &&
"Expected BWI target");
31975 if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.
isScalarInteger()) {
31976 assert(!Subtarget.
hasAVX512() &&
"Should use K-registers with AVX512");
31977 MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8;
31984 assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
31985 SrcVT == MVT::i64) &&
"Unexpected VT!");
31988 if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
31989 !(DstVT == MVT::x86mmx && SrcVT.
isVector()))
32002 assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
32003 "Unexpected source type in LowerBITCAST");
32007 MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
32010 if (DstVT == MVT::x86mmx)
32027 MVT ByteVecVT = V.getSimpleValueType();
32030 "Expected value to have byte element type.");
32031 assert(EltVT != MVT::i8 &&
32032 "Horizontal byte sum only makes sense for wider elements!");
32038 if (EltVT == MVT::i64) {
32045 if (EltVT == MVT::i32) {
32074 assert(EltVT == MVT::i16 &&
"Unknown how to handle type");
32090 MVT VT =
Op.getSimpleValueType();
32094 assert(EltVT == MVT::i8 &&
"Only vXi8 vector CTPOP lowering supported.");
32106 const int LUT[16] = { 0, 1, 1, 2,
32112 for (
int i = 0; i < NumElts; ++i)
32137 MVT VT =
Op.getSimpleValueType();
32139 "Unknown CTPOP type to handle");
32143 if (Subtarget.hasVPOPCNTDQ()) {
32180 MVT VT =
N.getSimpleValueType();
32195 unsigned ShiftedActiveBits = Known.
getBitWidth() - (LZ + TZ);
32198 if (ShiftedActiveBits <= 2) {
32199 if (ActiveBits > 2)
32210 if (ShiftedActiveBits <= 3) {
32211 if (ActiveBits > 3)
32225 if (ShiftedActiveBits <= 4 &&
32228 if (ActiveBits > 4)
32242 if (ShiftedActiveBits <= 8) {
32244 if (ActiveBits > 8)
32263 "We only do custom lowering for vector population count.");
32268 MVT VT =
Op.getSimpleValueType();
32290 "Only 128-bit vector bitreverse lowering supported.");
32297 for (
int i = 0; i != NumElts; ++i) {
32298 for (
int j = ScalarSizeInBytes - 1; j >= 0; --j) {
32299 int SourceByte = 16 + (i * ScalarSizeInBytes) + j;
32300 int PermuteByte = SourceByte | (2 << 5);
32314 MVT VT =
Op.getSimpleValueType();
32319 assert(Subtarget.
hasSSSE3() &&
"SSSE3 required for BITREVERSE");
32335 (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
32336 "Only tested for i8/i16/i32/i64");
32358 "Only byte vector BITREVERSE supported");
32363 if (Subtarget.hasGFNI()) {
32376 const int LoLUT[16] = {
32377 0x00, 0x80, 0x40, 0xC0,
32378 0x20, 0xA0, 0x60, 0xE0,
32379 0x10, 0x90, 0x50, 0xD0,
32380 0x30, 0xB0, 0x70, 0xF0};
32381 const int HiLUT[16] = {
32382 0x00, 0x08, 0x04, 0x0C,
32383 0x02, 0x0A, 0x06, 0x0E,
32384 0x01, 0x09, 0x05, 0x0D,
32385 0x03, 0x0B, 0x07, 0x0F};
32388 for (
unsigned i = 0; i < NumElts; ++i) {
32404 MVT VT =
Op.getSimpleValueType();
32407 if (VT == MVT::i8 ||
32419 if (Subtarget.hasPOPCNT())
32422 if (VT == MVT::i64) {
32431 if (VT != MVT::i16) {
32458 unsigned NewOpc = 0;
32459 switch (
N->getOpcode()) {
32483 {N->getOperand(0), N->getOperand(1), N->getOperand(2)},
32484 N->getSimpleValueType(0), MMO);
32494 unsigned Opc =
N->getOpcode();
32495 MVT VT =
N->getSimpleValueType(0);
32501 if (
N->hasAnyUseOfValue(0)) {
32512 "Used AtomicRMW ops other than Add should have been expanded!");
32535 assert(!
N->hasAnyUseOfValue(0));
32542 assert(!
N->hasAnyUseOfValue(0));
32550 assert(!
N->hasAnyUseOfValue(0));
32558 auto *
Node = cast<AtomicSDNode>(
Op.getNode());
32560 EVT VT =
Node->getMemoryVT();
32568 if (!IsSeqCst && IsTypeLegal)
32571 if (!IsTypeLegal && !Subtarget.useSoftFloat() &&
32573 Attribute::NoImplicitFloat)) {
32577 if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.
hasAVX()) {
32580 Node->getMemOperand());
32585 if (VT == MVT::i64) {
32589 MVT StVT = Subtarget.
hasSSE2() ? MVT::v2i64 : MVT::v4f32;
32594 MVT::i64,
Node->getMemOperand());
32595 }
else if (Subtarget.hasX87()) {
32599 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
32605 SDValue LdOps[] = {Chain, StackPtr};
32609 Chain =
Value.getValue(1);
32615 StoreOps, MVT::i64,
Node->getMemOperand());
32633 Node->getOperand(0),
Node->getOperand(2),
32634 Node->getOperand(1),
Node->getMemOperand());
32640 MVT VT =
N->getSimpleValueType(0);
32641 unsigned Opc =
Op.getOpcode();
32658 Op.getOperand(0),
Op.getOperand(1),
32664 if (
N->getValueType(1) == MVT::i1)
32687 Entry.IsSExt =
false;
32688 Entry.IsZExt =
false;
32689 Args.push_back(Entry);
32691 bool isF64 = ArgVT == MVT::f64;
32696 RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
32709 std::pair<SDValue, SDValue> CallResult = TLI.
LowerCallTo(CLI);
32713 return CallResult.first;
32729 bool FillWithZeroes =
false) {
32739 "input and widen element type must match");
32743 assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
32744 "Unexpected request for vector widening");
32762 Ops.
append(WidenNumElts - InNumElts, FillVal);
32774 "MGATHER/MSCATTER are supported on AVX-512 arch only");
32778 MVT VT = Src.getSimpleValueType();
32786 SDValue BasePtr =
N->getBasePtr();
32788 if (VT == MVT::v2f32 || VT == MVT::v2i32) {
32789 assert(Mask.getValueType() == MVT::v2i1 &&
"Unexpected mask type");
32791 if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
32796 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
32798 N->getMemoryVT(),
N->getMemOperand());
32803 MVT IndexVT = Index.getSimpleValueType();
32807 if (IndexVT == MVT::v2i32)
32813 !Index.getSimpleValueType().is512BitVector()) {
32829 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
32831 N->getMemoryVT(),
N->getMemOperand());
32838 MVT VT =
Op.getSimpleValueType();
32841 MVT MaskVT = Mask.getSimpleValueType();
32842 SDValue PassThru =
N->getPassThru();
32852 VT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask,
32854 N->getMemOperand(),
N->getAddressingMode(),
N->getExtensionType(),
32855 N->isExpandingLoad());
32862 "Expanding masked load is supported on AVX-512 target only!");
32865 "Expanding masked load is supported for 32 and 64-bit types only!");
32868 "Cannot lower masked load op.");
32871 (Subtarget.hasBWI() &&
32872 (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
32873 "Unsupported masked load op.");
32882 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
32883 "Unexpected mask type");
32889 WideDataVT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask,
32890 PassThru,
N->getMemoryVT(),
N->getMemOperand(),
N->getAddressingMode(),
32891 N->getExtensionType(),
N->isExpandingLoad());
32903 SDValue DataToStore =
N->getValue();
32910 "Expanding masked load is supported on AVX-512 target only!");
32913 "Expanding masked load is supported for 32 and 64-bit types only!");
32916 "Cannot lower masked store op.");
32919 (Subtarget.hasBWI() &&
32920 (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
32921 "Unsupported masked store op.");
32929 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
32930 "Unexpected mask type");
32934 DataToStore =
ExtendToType(DataToStore, WideDataVT, DAG);
32936 return DAG.
getMaskedStore(
N->getChain(), dl, DataToStore,
N->getBasePtr(),
32937 N->getOffset(), Mask,
N->getMemoryVT(),
32938 N->getMemOperand(),
N->getAddressingMode(),
32939 N->isTruncatingStore(),
N->isCompressingStore());
32945 "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only");
32949 MVT VT =
Op.getSimpleValueType();
32952 SDValue PassThru =
N->getPassThru();
32953 MVT IndexVT = Index.getSimpleValueType();
32958 if (IndexVT == MVT::v2i32)
32985 SDValue Ops[] = {
N->getChain(), PassThru, Mask,
N->getBasePtr(), Index,
32989 N->getMemOperand());
32998 MVT DstVT =
Op.getSimpleValueType();
33001 unsigned SrcAS =
N->getSrcAddressSpace();
33003 assert(SrcAS !=
N->getDestAddressSpace() &&
33004 "addrspacecast must be between different address spaces");
33008 }
else if (DstVT == MVT::i64) {
33010 }
else if (DstVT == MVT::i32) {
33028 if (
Op->getGluedNode())
33038 EVT VT =
Op.getValueType();
33051 unsigned IsData =
Op.getConstantOperandVal(4);
33055 if (!IsData && !Subtarget.hasPREFETCHI())
33056 return Op.getOperand(0);
33063 SDValue Operand =
N->getOperand(0);
33075 {Chain, Operand, One});
33081 const APInt Operand(32, OpNo);
33083 std::string Str(
" $");
33085 std::string OpNoStr1(Str + OpNoStr);
33086 std::string OpNoStr2(Str +
"{" + OpNoStr +
":");
33089 for (
auto &AsmStr : AsmStrs) {
33092 if (AsmStr.ends_with(OpNoStr1))
33093 I = AsmStr.size() - OpNoStr1.size();
33097 I = AsmStr.find(OpNoStr1 +
",");
33099 I = AsmStr.find(OpNoStr2);
33104 assert(
I > 0 &&
"Unexpected inline asm string!");
33112 auto TmpStr = AsmStr.substr(0,
I);
33113 I = TmpStr.rfind(
':');
33115 TmpStr = TmpStr.substr(
I + 1);
33116 return TmpStr.take_while(llvm::isAlpha);
33141 return SDValue(CmpZero.getNode(), 1);
33159 SDValue Ops[] = {Chain,
Ptr, ScalarPassThru, COND_NE, Flags};
33177 SDValue Ops[] = {Chain, ScalarVal,
Ptr, COND_NE, Flags};
33183 switch (
Op.getOpcode()) {
33273 return LowerFRAME_TO_ARGS_OFFSET(
Op, DAG);
33279 return lowerEH_SJLJ_SETUP_DISPATCH(
Op, DAG);
33350 unsigned Opc =
N->getOpcode();
33354 dbgs() <<
"ReplaceNodeResults: ";
33357 llvm_unreachable(
"Do not know how to custom type legalize this operation!");
33359 EVT VT =
N->getValueType(0);
33371 EVT VT =
N->getValueType(0);
33377 {
N->getOperand(0),
Lo});
33379 {
N->getOperand(0),
Hi});
33381 Lo.getValue(1),
Hi.getValue(1));
33391 assert(
N->getValueType(0) == MVT::i64 &&
"Unexpected VT!");
33397 if ((LZ + TZ) >= 32) {
33407 bool NoImplicitFloatOps =
33409 Attribute::NoImplicitFloat);
33410 if (
isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
33425 EVT VT =
N->getValueType(0);
33437 ConcatOps[0] = Res;
33444 EVT VT =
N->getValueType(0);
33446 VT == MVT::v2i32 &&
"Unexpected VT!");
33483 EVT VT =
N->getValueType(0);
33484 EVT InVT =
N->getOperand(0).getValueType();
33486 "Expected a VT that divides into 128 bits.");
33488 "Unexpected type action!");
33499 Ops[0] =
N->getOperand(0);
33501 Ops[0] =
N->getOperand(1);
33515 EVT VT =
N->getValueType(0);
33516 assert(VT == MVT::v2f32 &&
"Unexpected type (!= v2f32) on FMIN/FMAX.");
33520 N->getOperand(IsStrict ? 1 : 0), UNDEF);
33522 N->getOperand(IsStrict ? 2 : 1), UNDEF);
33525 Res = DAG.
getNode(Opc, dl, {MVT::v4f32, MVT::Other},
33526 {
N->getOperand(0),
LHS,
RHS});
33538 EVT VT =
N->getValueType(0);
33541 "Unexpected type action!");
33549 Ops0[0] =
N->getOperand(0);
33564 MVT VT =
N->getSimpleValueType(0);
33573 EVT InVT = In.getValueType();
33581 unsigned PackOpcode;
33585 dl, DAG, Subtarget)) {
33598 for (
unsigned I = 0;
I < MinElts; ++
I)
33599 TruncMask[
I] = Scale *
I;
33602 "Illegal vector type in truncation");
33615 if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) {
33620 if (InVT == MVT::v4i64 && VT == MVT::v4i8 &&
isTypeLegal(MVT::v8i64)) {
33627 if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 &&
33638 { 0, 1, 2, 3, 16, 17, 18, 19,
33639 -1, -1, -1, -1, -1, -1, -1, -1 });
33646 if ((InEltVT == MVT::i16 || InEltVT == MVT::i32 || InEltVT == MVT::i64) &&
33647 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32) &&
33650 !(MinElts <= 4 && InEltVT == MVT::i64 && EltVT == MVT::i8)))) {
33662 assert(
N->getValueType(0) == MVT::v8i8 &&
33663 "Do not know how to legalize this Node");
33667 EVT VT =
N->getValueType(0);
33669 EVT InVT = In.getValueType();
33670 if (!Subtarget.
hasSSE41() && VT == MVT::v4i64 &&
33671 (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
33673 "Unexpected type action!");
33699 if (VT == MVT::v16i32 || VT == MVT::v8i64) {
33711 In = DAG.
getNode(Opc, dl, InVT, In);
33724 unsigned HalfNumElts = NumElts / 2;
33726 for (
unsigned i = 0; i != HalfNumElts; ++i)
33727 ShufMask[i] = i + HalfNumElts;
33739 if (!Subtarget.hasAVX10_2())
33743 EVT VT =
N->getValueType(0);
33745 EVT OpVT =
Op.getValueType();
33748 if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
33761 bool IsStrict =
N->isStrictFPOpcode();
33763 EVT VT =
N->getValueType(0);
33764 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
33766 EVT SrcVT = Src.getValueType();
33773 DAG.
getNode(Opc, dl, {VT, MVT::Other},
33775 {NVT, MVT::Other}, {Chain, Src})});
33788 if (VT.
isVector() && Subtarget.hasFP16() &&
33791 EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
33793 if (SrcVT != MVT::v8f16) {
33804 DAG.
getNode(Opc, dl, {ResVT, MVT::Other}, {
N->getOperand(0), Src});
33808 Res = DAG.
getNode(Opc, dl, ResVT, Src);
33820 ConcatOps[0] = Res;
33833 "Unexpected type action!");
33843 {
N->getOperand(0), Src});
33850 if (PromoteVT == MVT::v2i32)
33858 if (PromoteVT == MVT::v2i32)
33870 ConcatOps[0] = Res;
33879 if (VT == MVT::v2i32) {
33881 "Strict unsigned conversion requires AVX512");
33884 "Unexpected type action!");
33885 if (Src.getValueType() == MVT::v2f64) {
33886 if (!IsSigned && !Subtarget.
hasAVX512()) {
33899 if (!IsSigned && !Subtarget.hasVLX()) {
33909 Opc =
N->getOpcode();
33914 Res = DAG.
getNode(Opc, dl, {MVT::v4i32, MVT::Other},
33915 {
N->getOperand(0), Src});
33918 Res = DAG.
getNode(Opc, dl, MVT::v4i32, Src);
33928 if (Src.getValueType() == MVT::v2f32 && IsStrict) {
33932 {
N->getOperand(0), Src});
33943 assert(!VT.
isVector() &&
"Vectors should have been handled above!");
33945 if ((Subtarget.hasDQI() && VT == MVT::i64 &&
33946 (SrcVT == MVT::f32 || SrcVT == MVT::f64)) ||
33947 (Subtarget.hasFP16() && SrcVT == MVT::f16)) {
33948 assert(!Subtarget.is64Bit() &&
"i64 should be legal");
33949 unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
33955 if (NumElts != SrcElts) {
33989 if (
SDValue V = FP_TO_INTHelper(
SDValue(
N, 0), DAG, IsSigned, Chain)) {
33998 if (
SDValue V = LRINT_LLRINTHelper(
N, DAG))
34007 bool IsStrict =
N->isStrictFPOpcode();
34009 EVT VT =
N->getValueType(0);
34010 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34012 Subtarget.hasVLX()) {
34013 if (Src.getValueType().getVectorElementType() == MVT::i16)
34016 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
34024 {
N->getOperand(0), Src});
34033 if (VT != MVT::v2f32)
34035 EVT SrcVT = Src.getValueType();
34036 if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
34041 {
N->getOperand(0), Src});
34050 if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
34060 for (
int i = 0; i != 2; ++i) {
34066 {
N->getOperand(0), Elt});
34074 SignCvts[0].getValue(1), SignCvts[1].getValue(1));
34076 {Chain, SignCvt, SignCvt});
34091 if (SrcVT != MVT::v2i32)
34094 if (IsSigned || Subtarget.
hasAVX512()) {
34103 {
N->getOperand(0), Src});
34112 llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::v2f64);
34118 {
N->getOperand(0),
Or, VBias});
34120 {MVT::v4f32, MVT::Other},
34133 bool IsStrict =
N->isStrictFPOpcode();
34135 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34136 SDValue Rnd =
N->getOperand(IsStrict ? 2 : 1);
34137 EVT SrcVT = Src.getValueType();
34138 EVT VT =
N->getValueType(0);
34140 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
34146 assert(Subtarget.hasF16C() &&
"Cannot widen f16 without F16C");
34152 {Chain, Src, Rnd});
34158 Results.push_back(V.getValue(1));
34171 Results.push_back(V.getValue(1));
34178 assert(
N->getValueType(0) == MVT::v2f32 &&
34179 "Do not know how to legalize this Node");
34180 if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
34182 bool IsStrict =
N->isStrictFPOpcode();
34183 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34184 if (Src.getValueType().getVectorElementType() != MVT::f16)
34191 {
N->getOperand(0), V});
34196 Results.push_back(V.getValue(1));
34200 unsigned IntNo =
N->getConstantOperandVal(1);
34203 "legalize this intrinsic operation!");
34204 case Intrinsic::x86_rdtsc:
34207 case Intrinsic::x86_rdtscp:
34210 case Intrinsic::x86_rdpmc:
34214 case Intrinsic::x86_rdpru:
34218 case Intrinsic::x86_xgetbv:
34228 EVT T =
N->getValueType(0);
34229 assert((
T == MVT::i64 ||
T == MVT::i128) &&
"can only expand cmpxchg pair");
34230 bool Regs64bit =
T == MVT::i128;
34232 "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
34233 MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
34235 std::tie(cpInL, cpInH) =
34238 Regs64bit ? X86::RAX : X86::EAX, cpInL,
SDValue());
34243 std::tie(swapInL, swapInH) =
34273 Regs64bit ? X86::RAX : X86::EAX,
34274 HalfT, Result.getValue(1));
34276 Regs64bit ? X86::RDX : X86::EDX,
34292 (
N->getValueType(0) == MVT::i64 ||
N->getValueType(0) == MVT::i128) &&
34294 bool NoImplicitFloatOps =
34296 Attribute::NoImplicitFloat);
34297 if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
34298 auto *Node = cast<AtomicSDNode>(
N);
34300 if (
N->getValueType(0) == MVT::i128) {
34301 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
34303 Node->getBasePtr(), Node->getMemOperand());
34318 MVT LdVT = Subtarget.
hasSSE2() ? MVT::v2i64 : MVT::v4f32;
34320 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
34322 MVT::i64, Node->getMemOperand());
34340 if (Subtarget.hasX87()) {
34344 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
34346 dl, Tys, Ops, MVT::i64,
34347 Node->getMemOperand());
34348 SDValue Chain = Result.getValue(1);
34355 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
34358 SDValue StoreOps[] = { Chain, Result, StackPtr };
34366 Result = DAG.
getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
34368 Results.push_back(Result.getValue(1));
34394 EVT DstVT =
N->getValueType(0);
34395 EVT SrcVT =
N->getOperand(0).getValueType();
34399 if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) {
34400 assert(!Subtarget.is64Bit() &&
"Expected 32-bit mode");
34410 if (DstVT.
isVector() && SrcVT == MVT::x86mmx) {
34414 "Unexpected type action!");
34426 EVT VT =
N->getValueType(0);
34427 if ((VT == MVT::v2f32 || VT == MVT::v2i32) &&
34428 (Subtarget.hasVLX() || !Subtarget.
hasAVX512())) {
34429 auto *Gather = cast<MaskedGatherSDNode>(
N);
34430 SDValue Index = Gather->getIndex();
34431 if (Index.getValueType() != MVT::v2i64)
34434 "Unexpected type action!");
34436 SDValue Mask = Gather->getMask();
34437 assert(Mask.getValueType() == MVT::v2i1 &&
"Unexpected mask type");
34439 Gather->getPassThru(),
34441 if (!Subtarget.hasVLX()) {
34448 SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
34449 Gather->getBasePtr(), Index, Gather->getScale() };
34452 Gather->getMemoryVT(), Gather->getMemOperand());
34463 MVT VT =
N->getSimpleValueType(0);
34466 "Unexpected type action!");
34469 auto *Ld = cast<LoadSDNode>(
N);
34471 MVT LdVT = Subtarget.is64Bit() && VT.
isInteger() ? MVT::i64 : MVT::f64;
34472 SDValue Res = DAG.
getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
34473 Ld->getPointerInfo(), Ld->getOriginalAlign(),
34486 SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
34488 MVT::i64, Ld->getMemOperand());
34499 assert(
N->getValueType(0) == MVT::i64 &&
"Unexpected VT!");
34500 assert((Subtarget.hasXOP() || Subtarget.hasGFNI()) &&
"Expected XOP/GFNI");
34508 assert(
N->getSimpleValueType(0) == MVT::f16 &&
34509 "Unexpected Value type of EXTRACT_VECTOR_ELT!");
34510 assert(Subtarget.hasFP16() &&
"Expected FP16");
34526#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
34990#undef NODE_NAME_CASE
35024 switch (AM.
Scale) {
35089 return NumBits1 > NumBits2;
35107 return isInt<32>(Imm);
35112 return isInt<32>(Imm);
35116 return isInt<32>(Imm);
35124 return NumBits1 > NumBits2;
35134 return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
35162 if (!Subtarget.is64Bit())
35168 if (isa<MaskedLoadSDNode>(ExtVal.
getOperand(0)))
35182 if (Subtarget.useSoftFloat())
35195 return Subtarget.hasFP16();
35207 EVT DestVT)
const {
35209 return !(SrcVT == MVT::i32 && DestVT == MVT::i16);
35252 if (VT == MVT::v32i8 || VT == MVT::v16i16)
35269 EVT ConditionVT)
const {
35288 if (mi.readsRegister(X86::EFLAGS,
nullptr))
35291 if (mi.definesRegister(X86::EFLAGS,
nullptr))
35298 if (Succ->isLiveIn(X86::EFLAGS))
35348 Register DstReg =
MI.getOperand(0).getReg();
35350 Register mainDstReg =
MRI.createVirtualRegister(RC);
35351 Register fallDstReg =
MRI.createVirtualRegister(RC);
35371 BuildMI(fallMBB, MIMD,
TII->get(X86::XABORT_DEF));
35372 BuildMI(fallMBB, MIMD,
TII->get(TargetOpcode::COPY), fallDstReg)
35378 BuildMI(*sinkMBB, sinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DstReg)
35382 MI.eraseFromParent();
35399 assert(
MI.getNumOperands() == 10 &&
"VAARG should have 10 operands!");
35402 Register DestReg =
MI.getOperand(0).getReg();
35408 unsigned ArgSize =
MI.getOperand(6).getImm();
35409 unsigned ArgMode =
MI.getOperand(7).getImm();
35415 assert(
MI.hasOneMemOperand() &&
"Expected VAARG to have one memoperand");
35442 unsigned TotalNumIntRegs = 6;
35443 unsigned TotalNumXMMRegs = 8;
35444 bool UseGPOffset = (ArgMode == 1);
35445 bool UseFPOffset = (ArgMode == 2);
35446 unsigned MaxOffset = TotalNumIntRegs * 8 +
35447 (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
35450 unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
35451 bool NeedsAlign = (Alignment > 8);
35458 unsigned OffsetDestReg = 0;
35459 unsigned OverflowDestReg = 0;
35460 unsigned OffsetReg = 0;
35462 if (!UseGPOffset && !UseFPOffset) {
35466 OverflowDestReg = DestReg;
35468 offsetMBB =
nullptr;
35469 overflowMBB = thisMBB;
35485 OffsetDestReg =
MRI.createVirtualRegister(AddrRegClass);
35486 OverflowDestReg =
MRI.createVirtualRegister(AddrRegClass);
35496 MF->
insert(MBBIter, offsetMBB);
35497 MF->
insert(MBBIter, overflowMBB);
35498 MF->
insert(MBBIter, endMBB);
35514 OffsetReg =
MRI.createVirtualRegister(OffsetRegClass);
35515 BuildMI(thisMBB, MIMD,
TII->get(X86::MOV32rm), OffsetReg)
35519 .
addDisp(Disp, UseFPOffset ? 4 : 0)
35524 BuildMI(thisMBB, MIMD,
TII->get(X86::CMP32ri))
35526 .
addImm(MaxOffset + 8 - ArgSizeA8);
35530 BuildMI(thisMBB, MIMD,
TII->get(X86::JCC_1))
35539 Register RegSaveReg =
MRI.createVirtualRegister(AddrRegClass);
35553 Register OffsetReg64 =
MRI.createVirtualRegister(AddrRegClass);
35554 BuildMI(offsetMBB, MIMD,
TII->get(X86::SUBREG_TO_REG), OffsetReg64)
35557 .
addImm(X86::sub_32bit);
35560 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD64rr), OffsetDestReg)
35565 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD32rr), OffsetDestReg)
35571 Register NextOffsetReg =
MRI.createVirtualRegister(OffsetRegClass);
35572 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD32ri), NextOffsetReg)
35574 .
addImm(UseFPOffset ? 16 : 8);
35577 BuildMI(offsetMBB, MIMD,
TII->get(X86::MOV32mr))
35581 .
addDisp(Disp, UseFPOffset ? 4 : 0)
35587 BuildMI(offsetMBB, MIMD,
TII->get(X86::JMP_1))
35596 Register OverflowAddrReg =
MRI.createVirtualRegister(AddrRegClass);
35611 Register TmpReg =
MRI.createVirtualRegister(AddrRegClass);
35618 .
addReg(OverflowAddrReg)
35628 BuildMI(overflowMBB, MIMD,
TII->get(TargetOpcode::COPY), OverflowDestReg)
35629 .
addReg(OverflowAddrReg);
35634 Register NextAddrReg =
MRI.createVirtualRegister(AddrRegClass);
35639 .
addReg(OverflowDestReg)
35656 TII->get(X86::PHI), DestReg)
35662 MI.eraseFromParent();
35680 SelectItr->addRegisterKilled(X86::EFLAGS,
TRI);
35688 switch (
MI.getOpcode()) {
35689 case X86::CMOV_FR16:
35690 case X86::CMOV_FR16X:
35691 case X86::CMOV_FR32:
35692 case X86::CMOV_FR32X:
35693 case X86::CMOV_FR64:
35694 case X86::CMOV_FR64X:
35695 case X86::CMOV_GR8:
35696 case X86::CMOV_GR16:
35697 case X86::CMOV_GR32:
35698 case X86::CMOV_RFP32:
35699 case X86::CMOV_RFP64:
35700 case X86::CMOV_RFP80:
35701 case X86::CMOV_VR64:
35702 case X86::CMOV_VR128:
35703 case X86::CMOV_VR128X:
35704 case X86::CMOV_VR256:
35705 case X86::CMOV_VR256X:
35706 case X86::CMOV_VR512:
35707 case X86::CMOV_VK1:
35708 case X86::CMOV_VK2:
35709 case X86::CMOV_VK4:
35710 case X86::CMOV_VK8:
35711 case X86::CMOV_VK16:
35712 case X86::CMOV_VK32:
35713 case X86::CMOV_VK64:
35749 Register DestReg = MIIt->getOperand(0).getReg();
35750 Register Op1Reg = MIIt->getOperand(1).getReg();
35751 Register Op2Reg = MIIt->getOperand(2).getReg();
35756 if (MIIt->getOperand(3).getImm() == OppCC)
35759 if (
auto It = RegRewriteTable.
find(Op1Reg); It != RegRewriteTable.
end())
35760 Op1Reg = It->second.first;
35762 if (
auto It = RegRewriteTable.
find(Op2Reg); It != RegRewriteTable.
end())
35763 Op2Reg = It->second.second;
35766 BuildMI(*SinkMBB, SinkInsertionPoint, MIMD,
TII->get(X86::PHI), DestReg)
35773 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
35781X86TargetLowering::EmitLoweredCascadedSelect(
MachineInstr &FirstCMOV,
35865 F->insert(It, FirstInsertedMBB);
35866 F->insert(It, SecondInsertedMBB);
35867 F->insert(It, SinkMBB);
35872 FirstInsertedMBB->
addLiveIn(X86::EFLAGS);
35877 if (!SecondCascadedCMOV.
killsRegister(X86::EFLAGS,
nullptr) &&
35879 SecondInsertedMBB->
addLiveIn(X86::EFLAGS);
35906 BuildMI(FirstInsertedMBB, MIMD,
TII->get(X86::JCC_1))
35916 BuildMI(*SinkMBB, SinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DestReg)
35918 .
addMBB(SecondInsertedMBB)
35999 (NextMIIt->getOperand(3).getImm() ==
CC ||
36000 NextMIIt->getOperand(3).getImm() == OppCC)) {
36001 LastCMOV = &*NextMIIt;
36008 if (LastCMOV == &
MI && NextMIIt != ThisMBB->
end() &&
36009 NextMIIt->getOpcode() ==
MI.getOpcode() &&
36010 NextMIIt->getOperand(2).getReg() ==
MI.getOperand(2).getReg() &&
36011 NextMIIt->getOperand(1).getReg() ==
MI.getOperand(0).getReg() &&
36012 NextMIIt->getOperand(1).isKill()) {
36013 return EmitLoweredCascadedSelect(
MI, *NextMIIt, ThisMBB);
36022 F->insert(It, FalseMBB);
36023 F->insert(It, SinkMBB);
36026 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
36043 if (
MI.isDebugInstr())
36047 SinkMBB->
splice(SinkMBB->
end(), ThisMBB,
36071 ThisMBB->
erase(MIItBegin, MIItEnd);
36078 return X86::SUB64ri32;
36080 return X86::SUB32ri;
36100 MF->
insert(MBBIter, testMBB);
36101 MF->
insert(MBBIter, blockMBB);
36102 MF->
insert(MBBIter, tailMBB);
36104 Register sizeVReg =
MI.getOperand(1).getReg();
36108 Register TmpStackPtr =
MRI.createVirtualRegister(
36110 Register FinalStackPtr =
MRI.createVirtualRegister(
36129 BuildMI(testMBB, MIMD,
TII->get(X86::JCC_1))
36147 const unsigned XORMIOpc =
36161 BuildMI(tailMBB, MIMD,
TII->get(TargetOpcode::COPY),
36162 MI.getOperand(0).getReg())
36171 MI.eraseFromParent();
36187 const bool Is64Bit = Subtarget.is64Bit();
36190 const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
36191 const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
36217 Register mallocPtrVReg =
MRI.createVirtualRegister(AddrRegClass),
36218 bumpSPPtrVReg =
MRI.createVirtualRegister(AddrRegClass),
36219 tmpSPVReg =
MRI.createVirtualRegister(AddrRegClass),
36220 SPLimitVReg =
MRI.createVirtualRegister(AddrRegClass),
36221 sizeVReg =
MI.getOperand(1).getReg(),
36227 MF->
insert(MBBIter, bumpMBB);
36228 MF->
insert(MBBIter, mallocMBB);
36229 MF->
insert(MBBIter, continueMBB);
36237 BuildMI(BB, MIMD,
TII->get(TargetOpcode::COPY), tmpSPVReg).
addReg(physSPReg);
36238 BuildMI(BB, MIMD,
TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
36240 BuildMI(BB, MIMD,
TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
36247 BuildMI(bumpMBB, MIMD,
TII->get(TargetOpcode::COPY), physSPReg)
36249 BuildMI(bumpMBB, MIMD,
TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
36257 BuildMI(mallocMBB, MIMD,
TII->get(X86::MOV64rr), X86::RDI)
36259 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALL64pcrel32))
36264 }
else if (Is64Bit) {
36265 BuildMI(mallocMBB, MIMD,
TII->get(X86::MOV32rr), X86::EDI)
36267 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALL64pcrel32))
36273 BuildMI(mallocMBB, MIMD,
TII->get(X86::SUB32ri), physSPReg).
addReg(physSPReg)
36276 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALLpcrel32))
36283 BuildMI(mallocMBB, MIMD,
TII->get(X86::ADD32ri), physSPReg).
addReg(physSPReg)
36286 BuildMI(mallocMBB, MIMD,
TII->get(TargetOpcode::COPY), mallocPtrVReg)
36287 .
addReg(IsLP64 ? X86::RAX : X86::EAX);
36297 BuildMI(*continueMBB, continueMBB->
begin(), MIMD,
TII->get(X86::PHI),
36298 MI.getOperand(0).getReg())
36305 MI.eraseFromParent();
36308 return continueMBB;
36321 "SEH does not use catchret!");
36324 if (!Subtarget.is32Bit())
36335 MI.getOperand(0).setMBB(RestoreMBB);
36341 auto RestoreMBBI = RestoreMBB->
begin();
36342 BuildMI(*RestoreMBB, RestoreMBBI, MIMD,
TII.get(X86::JMP_4)).
addMBB(TargetMBB);
36358 assert(
MI.getOperand(3).isGlobal() &&
"This should be a global");
36364 Subtarget.is64Bit() ?
36367 if (Subtarget.is64Bit()) {
36369 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV64rm), X86::RDI)
36374 MI.getOperand(3).getTargetFlags())
36381 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV32rm), X86::EAX)
36386 MI.getOperand(3).getTargetFlags())
36393 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV32rm), X86::EAX)
36398 MI.getOperand(3).getTargetFlags())
36405 MI.eraseFromParent();
36411 case X86::INDIRECT_THUNK_CALL32:
36412 return X86::CALLpcrel32;
36413 case X86::INDIRECT_THUNK_CALL64:
36414 return X86::CALL64pcrel32;
36415 case X86::INDIRECT_THUNK_TCRETURN32:
36416 return X86::TCRETURNdi;
36417 case X86::INDIRECT_THUNK_TCRETURN64:
36418 return X86::TCRETURNdi64;
36425 if (Subtarget.useRetpolineExternalThunk()) {
36441 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36442 return "__x86_indirect_thunk_eax";
36444 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36445 return "__x86_indirect_thunk_ecx";
36447 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36448 return "__x86_indirect_thunk_edx";
36450 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36451 return "__x86_indirect_thunk_edi";
36453 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36454 return "__x86_indirect_thunk_r11";
36459 if (Subtarget.useRetpolineIndirectCalls() ||
36460 Subtarget.useRetpolineIndirectBranches()) {
36464 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36465 return "__llvm_retpoline_eax";
36467 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36468 return "__llvm_retpoline_ecx";
36470 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36471 return "__llvm_retpoline_edx";
36473 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36474 return "__llvm_retpoline_edi";
36476 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36477 return "__llvm_retpoline_r11";
36482 if (Subtarget.useLVIControlFlowIntegrity()) {
36483 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36484 return "__llvm_lvi_thunk_r11";
36486 llvm_unreachable(
"getIndirectThunkSymbol() invoked without thunk feature");
36496 Register CalleeVReg =
MI.getOperand(0).getReg();
36506 if (Subtarget.is64Bit())
36509 AvailableRegs.
append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
36512 for (
const auto &MO :
MI.operands()) {
36513 if (MO.isReg() && MO.isUse())
36514 llvm::replace(AvailableRegs,
static_cast<unsigned>(MO.getReg()), 0U);
36518 unsigned AvailableReg = 0;
36519 for (
unsigned MaybeReg : AvailableRegs) {
36521 AvailableReg = MaybeReg;
36527 "available registers");
36531 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), AvailableReg)
36533 MI.getOperand(0).ChangeToES(Symbol);
36534 MI.setDesc(
TII->get(Opc));
36552void X86TargetLowering::emitSetJmpShadowStackFix(
MachineInstr &
MI,
36566 Register ZReg =
MRI.createVirtualRegister(PtrRC);
36567 unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
36574 Register SSPCopyReg =
MRI.createVirtualRegister(PtrRC);
36575 unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
36579 unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
36582 const unsigned MemOpndSlot = 1;
36585 MIB.
addDisp(
MI.getOperand(MemOpndSlot + i), SSPOffset);
36587 MIB.
add(
MI.getOperand(MemOpndSlot + i));
36609 unsigned MemOpndSlot = 0;
36611 unsigned CurOp = 0;
36613 DstReg =
MI.getOperand(CurOp++).getReg();
36615 assert(
TRI->isTypeLegalForClass(*RC, MVT::i32) &&
"Invalid destination!");
36617 Register mainDstReg =
MRI.createVirtualRegister(RC);
36618 Register restoreDstReg =
MRI.createVirtualRegister(RC);
36620 MemOpndSlot = CurOp;
36623 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
36624 "Invalid Pointer Size!");
36659 unsigned PtrStoreOpc = 0;
36660 unsigned LabelReg = 0;
36666 if (!UseImmLabel) {
36667 PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
36669 LabelReg =
MRI.createVirtualRegister(PtrRC);
36670 if (Subtarget.is64Bit()) {
36671 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::LEA64r), LabelReg)
36679 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::LEA32r), LabelReg)
36687 PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
36689 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrStoreOpc));
36692 MIB.
addDisp(
MI.getOperand(MemOpndSlot + i), LabelOffset);
36694 MIB.
add(
MI.getOperand(MemOpndSlot + i));
36703 emitSetJmpShadowStackFix(
MI, thisMBB);
36707 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::EH_SjLj_Setup))
36717 BuildMI(mainMBB, MIMD,
TII->get(X86::MOV32r0), mainDstReg);
36721 BuildMI(*sinkMBB, sinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DstReg)
36728 if (
RegInfo->hasBasePointer(*MF)) {
36729 const bool Uses64BitFramePtr =
36735 unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
36740 BuildMI(restoreMBB, MIMD,
TII->get(X86::MOV32ri), restoreDstReg).
addImm(1);
36744 MI.eraseFromParent();
36803 MF->
insert(
I, fixShadowLoopPrepareMBB);
36804 MF->
insert(
I, fixShadowLoopMBB);
36815 Register ZReg =
MRI.createVirtualRegister(&X86::GR32RegClass);
36816 BuildMI(checkSspMBB, MIMD,
TII->get(X86::MOV32r0), ZReg);
36818 if (PVT == MVT::i64) {
36819 Register TmpZReg =
MRI.createVirtualRegister(PtrRC);
36820 BuildMI(checkSspMBB, MIMD,
TII->get(X86::SUBREG_TO_REG), TmpZReg)
36823 .
addImm(X86::sub_32bit);
36828 Register SSPCopyReg =
MRI.createVirtualRegister(PtrRC);
36829 unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
36834 unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
36835 BuildMI(checkSspMBB, MIMD,
TII->get(TestRROpc))
36838 BuildMI(checkSspMBB, MIMD,
TII->get(X86::JCC_1))
36845 Register PrevSSPReg =
MRI.createVirtualRegister(PtrRC);
36846 unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
36849 BuildMI(fallMBB, MIMD,
TII->get(PtrLoadOpc), PrevSSPReg);
36854 else if (MO.
isReg())
36863 Register SspSubReg =
MRI.createVirtualRegister(PtrRC);
36864 unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
36865 BuildMI(fallMBB, MIMD,
TII->get(SubRROpc), SspSubReg)
36870 BuildMI(fallMBB, MIMD,
TII->get(X86::JCC_1))
36877 unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
36878 unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
36879 Register SspFirstShrReg =
MRI.createVirtualRegister(PtrRC);
36880 BuildMI(fixShadowMBB, MIMD,
TII->get(ShrRIOpc), SspFirstShrReg)
36885 unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
36889 Register SspSecondShrReg =
MRI.createVirtualRegister(PtrRC);
36890 BuildMI(fixShadowMBB, MIMD,
TII->get(ShrRIOpc), SspSecondShrReg)
36895 BuildMI(fixShadowMBB, MIMD,
TII->get(X86::JCC_1))
36902 unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64ri : X86::SHL32ri;
36903 Register SspAfterShlReg =
MRI.createVirtualRegister(PtrRC);
36904 BuildMI(fixShadowLoopPrepareMBB, MIMD,
TII->get(ShlR1Opc), SspAfterShlReg)
36905 .
addReg(SspSecondShrReg)
36909 Register Value128InReg =
MRI.createVirtualRegister(PtrRC);
36910 unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
36911 BuildMI(fixShadowLoopPrepareMBB, MIMD,
TII->get(MovRIOpc), Value128InReg)
36913 fixShadowLoopPrepareMBB->
addSuccessor(fixShadowLoopMBB);
36917 Register DecReg =
MRI.createVirtualRegister(PtrRC);
36918 Register CounterReg =
MRI.createVirtualRegister(PtrRC);
36919 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(X86::PHI), CounterReg)
36921 .
addMBB(fixShadowLoopPrepareMBB)
36923 .
addMBB(fixShadowLoopMBB);
36926 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(IncsspOpc)).
addReg(Value128InReg);
36929 unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
36930 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(DecROpc), DecReg).
addReg(CounterReg);
36933 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(X86::JCC_1))
36934 .
addMBB(fixShadowLoopMBB)
36954 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
36955 "Invalid Pointer Size!");
36958 (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
36962 Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
36970 unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
36971 unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
36977 thisMBB = emitLongJmpShadowStackFix(
MI, thisMBB);
36994 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrLoadOpc), Tmp);
36998 MIB.
addDisp(MO, LabelOffset);
36999 else if (MO.
isReg())
37008 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrLoadOpc), SP);
37011 MIB.
addDisp(
MI.getOperand(i), SPOffset);
37013 MIB.
add(
MI.getOperand(i));
37022 MI.eraseFromParent();
37036 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
37045 Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
37048 (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
37049 VR =
MRI->createVirtualRegister(TRC);
37050 Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
37052 if (Subtarget.is64Bit())
37088 unsigned MaxCSNum = 0;
37089 for (
auto &
MBB : *MF) {
37094 for (
const auto &
MI :
MBB) {
37095 if (
MI.isDebugInstr())
37098 assert(
MI.isEHLabel() &&
"expected EH_LABEL");
37099 Sym =
MI.getOperand(0).getMCSymbol();
37103 if (!MF->hasCallSiteLandingPad(
Sym))
37106 for (
unsigned CSI : MF->getCallSiteLandingPad(
Sym)) {
37107 CallSiteNumToLPad[CSI].push_back(&
MBB);
37108 MaxCSNum = std::max(MaxCSNum, CSI);
37113 std::vector<MachineBasicBlock *> LPadList;
37117 for (
unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
37118 for (
auto &LP : CallSiteNumToLPad[CSI]) {
37119 LPadList.push_back(LP);
37120 InvokeBBs.
insert(LP->pred_begin(), LP->pred_end());
37124 assert(!LPadList.empty() &&
37125 "No landing pad destinations for the dispatch jump table!");
37141 MF->push_back(DispatchBB);
37142 MF->push_back(DispContBB);
37143 MF->push_back(TrapBB);
37147 SetupEntryBlockForSjLj(
MI, BB, DispatchBB, FI);
37158 const bool FPIs64Bit =
37165 unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
37170 BuildMI(DispatchBB, MIMD,
TII->get(X86::NOOP))
37175 Register IReg =
MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
37177 Subtarget.is64Bit() ? 8 : 4);
37178 BuildMI(DispatchBB, MIMD,
TII->get(X86::CMP32ri))
37180 .
addImm(LPadList.size());
37181 BuildMI(DispatchBB, MIMD,
TII->get(X86::JCC_1))
37185 if (Subtarget.is64Bit()) {
37186 Register BReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
37187 Register IReg64 =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
37190 BuildMI(DispContBB, MIMD,
TII->get(X86::LEA64r), BReg)
37197 BuildMI(DispContBB, MIMD,
TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
37200 .
addImm(X86::sub_32bit);
37205 BuildMI(DispContBB, MIMD,
TII->get(X86::JMP64m))
37213 Register OReg =
MRI->createVirtualRegister(&X86::GR32RegClass);
37214 Register OReg64 =
MRI->createVirtualRegister(&X86::GR64RegClass);
37215 Register TReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
37218 BuildMI(DispContBB, MIMD,
TII->get(X86::MOV32rm), OReg)
37225 BuildMI(DispContBB, MIMD,
TII->get(X86::MOVSX64rr32), OReg64)
37228 BuildMI(DispContBB, MIMD,
TII->get(X86::ADD64rr), TReg)
37240 BuildMI(DispContBB, MIMD,
TII->get(X86::JMP32m))
37250 for (
auto &LP : LPadList)
37251 if (SeenMBBs.
insert(LP).second)
37256 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
37264 for (
auto *MBBS : Successors) {
37265 if (MBBS->isEHPad()) {
37282 for (
auto &MOp :
II.operands())
37284 DefRegs[MOp.getReg()] =
true;
37287 for (
unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) {
37288 unsigned Reg = SavedRegs[RegIdx];
37299 for (
auto &LP : MBBLPads)
37300 LP->setIsEHPad(
false);
37303 MI.eraseFromParent();
37318 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
37324 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
37339 auto TMMImmToTMMReg = [](
unsigned Imm) {
37340 assert (Imm < 8 &&
"Illegal tmm index");
37341 return X86::TMM0 + Imm;
37343 auto TMMImmToTMMPair = [](
unsigned Imm) {
37344 assert(Imm < 8 &&
"Illegal tmm pair index.");
37345 return X86::TMM0_TMM1 + Imm / 2;
37347 switch (
MI.getOpcode()) {
37350 case X86::INDIRECT_THUNK_CALL32:
37351 case X86::INDIRECT_THUNK_CALL64:
37352 case X86::INDIRECT_THUNK_TCRETURN32:
37353 case X86::INDIRECT_THUNK_TCRETURN64:
37354 return EmitLoweredIndirectThunk(
MI, BB);
37355 case X86::CATCHRET:
37356 return EmitLoweredCatchRet(
MI, BB);
37357 case X86::SEG_ALLOCA_32:
37358 case X86::SEG_ALLOCA_64:
37359 return EmitLoweredSegAlloca(
MI, BB);
37360 case X86::PROBED_ALLOCA_32:
37361 case X86::PROBED_ALLOCA_64:
37362 return EmitLoweredProbedAlloca(
MI, BB);
37363 case X86::TLSCall_32:
37364 case X86::TLSCall_64:
37365 return EmitLoweredTLSCall(
MI, BB);
37366 case X86::CMOV_FR16:
37367 case X86::CMOV_FR16X:
37368 case X86::CMOV_FR32:
37369 case X86::CMOV_FR32X:
37370 case X86::CMOV_FR64:
37371 case X86::CMOV_FR64X:
37372 case X86::CMOV_GR8:
37373 case X86::CMOV_GR16:
37374 case X86::CMOV_GR32:
37375 case X86::CMOV_RFP32:
37376 case X86::CMOV_RFP64:
37377 case X86::CMOV_RFP80:
37378 case X86::CMOV_VR64:
37379 case X86::CMOV_VR128:
37380 case X86::CMOV_VR128X:
37381 case X86::CMOV_VR256:
37382 case X86::CMOV_VR256X:
37383 case X86::CMOV_VR512:
37384 case X86::CMOV_VK1:
37385 case X86::CMOV_VK2:
37386 case X86::CMOV_VK4:
37387 case X86::CMOV_VK8:
37388 case X86::CMOV_VK16:
37389 case X86::CMOV_VK32:
37390 case X86::CMOV_VK64:
37391 return EmitLoweredSelect(
MI, BB);
37393 case X86::FP80_ADDr:
37394 case X86::FP80_ADDm32: {
37397 int OrigCWFrameIdx =
37417 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), NewCW16)
37421 int NewCWFrameIdx =
37432 if (
MI.getOpcode() == X86::FP80_ADDr) {
37434 .
add(
MI.getOperand(0))
37435 .
add(
MI.getOperand(1))
37436 .
add(
MI.getOperand(2));
37439 .
add(
MI.getOperand(0))
37440 .
add(
MI.getOperand(1))
37441 .
add(
MI.getOperand(2))
37442 .
add(
MI.getOperand(3))
37443 .
add(
MI.getOperand(4))
37444 .
add(
MI.getOperand(5))
37445 .
add(
MI.getOperand(6));
37452 MI.eraseFromParent();
37456 case X86::FP32_TO_INT16_IN_MEM:
37457 case X86::FP32_TO_INT32_IN_MEM:
37458 case X86::FP32_TO_INT64_IN_MEM:
37459 case X86::FP64_TO_INT16_IN_MEM:
37460 case X86::FP64_TO_INT32_IN_MEM:
37461 case X86::FP64_TO_INT64_IN_MEM:
37462 case X86::FP80_TO_INT16_IN_MEM:
37463 case X86::FP80_TO_INT32_IN_MEM:
37464 case X86::FP80_TO_INT64_IN_MEM: {
37467 int OrigCWFrameIdx =
37485 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), NewCW16)
37489 int NewCWFrameIdx =
37497 TII->get(X86::FLDCW16m)), NewCWFrameIdx);
37501 switch (
MI.getOpcode()) {
37504 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32;
break;
37505 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32;
break;
37506 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32;
break;
37507 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64;
break;
37508 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64;
break;
37509 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64;
break;
37510 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80;
break;
37511 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80;
break;
37512 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80;
break;
37524 MI.eraseFromParent();
37532 case X86::VAARG_64:
37533 case X86::VAARG_X32:
37534 return EmitVAARGWithCustomInserter(
MI, BB);
37536 case X86::EH_SjLj_SetJmp32:
37537 case X86::EH_SjLj_SetJmp64:
37538 return emitEHSjLjSetJmp(
MI, BB);
37540 case X86::EH_SjLj_LongJmp32:
37541 case X86::EH_SjLj_LongJmp64:
37542 return emitEHSjLjLongJmp(
MI, BB);
37544 case X86::Int_eh_sjlj_setup_dispatch:
37545 return EmitSjLjDispatchBlock(
MI, BB);
37547 case TargetOpcode::STATEPOINT:
37552 case TargetOpcode::STACKMAP:
37553 case TargetOpcode::PATCHPOINT:
37556 case TargetOpcode::PATCHABLE_EVENT_CALL:
37557 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
37558 return emitPatchableEventCall(
MI, BB);
37560 case X86::LCMPXCHG8B: {
37573 if (!Subtarget.is32Bit() || !
TRI->hasBasePointer(*MF))
37580 assert(
TRI->getBaseRegister() == X86::ESI &&
37581 "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
37582 "base pointer in mind");
37587 Register computedAddrVReg =
MRI.createVirtualRegister(AddrRegClass);
37592 if (AM.
IndexReg == X86::NoRegister)
37599 while (RMBBI != BB->
rend() &&
37600 (RMBBI->definesRegister(X86::EAX,
nullptr) ||
37601 RMBBI->definesRegister(X86::EBX,
nullptr) ||
37602 RMBBI->definesRegister(X86::ECX,
nullptr) ||
37603 RMBBI->definesRegister(X86::EDX,
nullptr))) {
37608 BuildMI(*BB, *
MBBI, MIMD,
TII->get(X86::LEA32r), computedAddrVReg), AM);
37614 case X86::LCMPXCHG16B_NO_RBX: {
37617 if (
TRI->hasBasePointer(*MF) &&
37618 (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
37624 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), SaveRBX)
37628 BuildMI(*BB,
MI, MIMD,
TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
37635 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::RBX)
37642 MI.eraseFromParent();
37645 case X86::MWAITX: {
37648 bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX);
37651 if (!IsRBX || !
TRI->hasBasePointer(*MF)) {
37652 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::ECX)
37653 .
addReg(
MI.getOperand(0).getReg());
37654 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EAX)
37655 .
addReg(
MI.getOperand(1).getReg());
37656 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EBX)
37657 .
addReg(
MI.getOperand(2).getReg());
37659 MI.eraseFromParent();
37665 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::ECX)
37666 .
addReg(
MI.getOperand(0).getReg());
37667 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EAX)
37668 .
addReg(
MI.getOperand(1).getReg());
37669 assert(Subtarget.is64Bit() &&
"Expected 64-bit mode!");
37673 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), SaveRBX)
37677 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MWAITX_SAVE_RBX))
37679 .
addReg(
MI.getOperand(2).getReg())
37681 MI.eraseFromParent();
37685 case TargetOpcode::PREALLOCATED_SETUP: {
37686 assert(Subtarget.is32Bit() &&
"preallocated only used in 32-bit");
37689 int64_t PreallocatedId =
MI.getOperand(0).getImm();
37691 assert(StackAdjustment != 0 &&
"0 stack adjustment");
37693 << StackAdjustment <<
"\n");
37694 BuildMI(*BB,
MI, MIMD,
TII->get(X86::SUB32ri), X86::ESP)
37696 .
addImm(StackAdjustment);
37697 MI.eraseFromParent();
37700 case TargetOpcode::PREALLOCATED_ARG: {
37701 assert(Subtarget.is32Bit() &&
"preallocated calls only used in 32-bit");
37702 int64_t PreallocatedId =
MI.getOperand(1).getImm();
37703 int64_t ArgIdx =
MI.getOperand(2).getImm();
37707 <<
", arg offset " << ArgOffset <<
"\n");
37710 MI.getOperand(0).getReg()),
37711 X86::ESP,
false, ArgOffset);
37712 MI.eraseFromParent();
37715 case X86::PTDPBSSD:
37716 case X86::PTDPBSUD:
37717 case X86::PTDPBUSD:
37718 case X86::PTDPBUUD:
37719 case X86::PTDPBF16PS:
37720 case X86::PTDPFP16PS:
37721 case X86::PTCMMIMFP16PS:
37722 case X86::PTCMMRLFP16PS:
37723 case X86::PTDPBF8PS:
37724 case X86::PTDPBHF8PS:
37725 case X86::PTDPHBF8PS:
37726 case X86::PTDPHF8PS:
37727 case X86::PTTDPBF16PS:
37728 case X86::PTTDPFP16PS:
37729 case X86::PTTCMMIMFP16PS:
37730 case X86::PTTCMMRLFP16PS:
37731 case X86::PTCONJTCMMIMFP16PS:
37732 case X86::PTMMULTF32PS:
37733 case X86::PTTMMULTF32PS: {
37735 switch (
MI.getOpcode()) {
37737 case X86::PTDPBSSD: Opc = X86::TDPBSSD;
break;
37738 case X86::PTDPBSUD: Opc = X86::TDPBSUD;
break;
37739 case X86::PTDPBUSD: Opc = X86::TDPBUSD;
break;
37740 case X86::PTDPBUUD: Opc = X86::TDPBUUD;
break;
37741 case X86::PTDPBF16PS: Opc = X86::TDPBF16PS;
break;
37742 case X86::PTDPFP16PS: Opc = X86::TDPFP16PS;
break;
37743 case X86::PTCMMIMFP16PS:
37744 Opc = X86::TCMMIMFP16PS;
37746 case X86::PTCMMRLFP16PS:
37747 Opc = X86::TCMMRLFP16PS;
37749 case X86::PTDPBF8PS: Opc = X86::TDPBF8PS;
break;
37750 case X86::PTDPBHF8PS: Opc = X86::TDPBHF8PS;
break;
37751 case X86::PTDPHBF8PS: Opc = X86::TDPHBF8PS;
break;
37752 case X86::PTDPHF8PS: Opc = X86::TDPHF8PS;
break;
37753 case X86::PTTDPBF16PS:
37754 Opc = X86::TTDPBF16PS;
37756 case X86::PTTDPFP16PS:
37757 Opc = X86::TTDPFP16PS;
37759 case X86::PTTCMMIMFP16PS:
37760 Opc = X86::TTCMMIMFP16PS;
37762 case X86::PTTCMMRLFP16PS:
37763 Opc = X86::TTCMMRLFP16PS;
37765 case X86::PTCONJTCMMIMFP16PS:
37766 Opc = X86::TCONJTCMMIMFP16PS;
37768 case X86::PTMMULTF32PS:
37769 Opc = X86::TMMULTF32PS;
37771 case X86::PTTMMULTF32PS:
37772 Opc = X86::TTMMULTF32PS;
37782 MI.eraseFromParent();
37785 case X86::PTILEZERO: {
37786 unsigned Imm =
MI.getOperand(0).getImm();
37787 BuildMI(*BB,
MI, MIMD,
TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
37788 MI.eraseFromParent();
37793 case X86::PTILEZEROV: {
37798 case X86::PTILELOADDRS:
37799 case X86::PTILELOADDRST1:
37800 case X86::PTILELOADD:
37801 case X86::PTILELOADDT1:
37802 case X86::PTILESTORED: {
37804 switch (
MI.getOpcode()) {
37806#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
37807 case X86::PTILELOADD:
37810 case X86::PTILELOADDT1:
37813 case X86::PTILESTORED:
37816 case X86::PTILELOADDRS:
37819 case X86::PTILELOADDRST1:
37823#undef GET_EGPR_IF_ENABLED
37826 unsigned CurOp = 0;
37827 if (Opc != X86::TILESTORED && Opc != X86::TILESTORED_EVEX)
37828 MIB.
addReg(TMMImmToTMMReg(
MI.getOperand(CurOp++).getImm()),
37831 MIB.
add(
MI.getOperand(CurOp++));
37832 MIB.
add(
MI.getOperand(CurOp++));
37833 MIB.
add(
MI.getOperand(CurOp++));
37834 MIB.
add(
MI.getOperand(CurOp++));
37835 MIB.
add(
MI.getOperand(CurOp++));
37837 if (Opc == X86::TILESTORED || Opc == X86::TILESTORED_EVEX)
37838 MIB.
addReg(TMMImmToTMMReg(
MI.getOperand(CurOp++).getImm()),
37841 MI.eraseFromParent();
37844 case X86::PT2RPNTLVWZ0:
37845 case X86::PT2RPNTLVWZ0T1:
37846 case X86::PT2RPNTLVWZ1:
37847 case X86::PT2RPNTLVWZ1T1:
37848 case X86::PT2RPNTLVWZ0RS:
37849 case X86::PT2RPNTLVWZ0RST1:
37850 case X86::PT2RPNTLVWZ1RS:
37851 case X86::PT2RPNTLVWZ1RST1: {
37854#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
37855 switch (
MI.getOpcode()) {
37858 case X86::PT2RPNTLVWZ0:
37861 case X86::PT2RPNTLVWZ0T1:
37864 case X86::PT2RPNTLVWZ1:
37867 case X86::PT2RPNTLVWZ1T1:
37870 case X86::PT2RPNTLVWZ0RS:
37873 case X86::PT2RPNTLVWZ0RST1:
37876 case X86::PT2RPNTLVWZ1RS:
37879 case X86::PT2RPNTLVWZ1RST1:
37883#undef GET_EGPR_IF_ENABLED
37887 MIB.
add(
MI.getOperand(1));
37888 MIB.
add(
MI.getOperand(2));
37889 MIB.
add(
MI.getOperand(3));
37890 MIB.
add(
MI.getOperand(4));
37891 MIB.
add(
MI.getOperand(5));
37892 MI.eraseFromParent();
37895 case X86::PTTRANSPOSED:
37896 case X86::PTCONJTFP16: {
37898 unsigned Opc =
MI.getOpcode() == X86::PTTRANSPOSED ? X86::TTRANSPOSED
37905 MI.eraseFromParent();
37908 case X86::PTCVTROWPS2BF16Hrri:
37909 case X86::PTCVTROWPS2BF16Lrri:
37910 case X86::PTCVTROWPS2PHHrri:
37911 case X86::PTCVTROWPS2PHLrri:
37912 case X86::PTCVTROWD2PSrri:
37913 case X86::PTILEMOVROWrri: {
37916 switch (
MI.getOpcode()) {
37919 case X86::PTCVTROWD2PSrri:
37920 Opc = X86::TCVTROWD2PSrri;
37922 case X86::PTCVTROWPS2BF16Hrri:
37923 Opc = X86::TCVTROWPS2BF16Hrri;
37925 case X86::PTCVTROWPS2PHHrri:
37926 Opc = X86::TCVTROWPS2PHHrri;
37928 case X86::PTCVTROWPS2BF16Lrri:
37929 Opc = X86::TCVTROWPS2BF16Lrri;
37931 case X86::PTCVTROWPS2PHLrri:
37932 Opc = X86::TCVTROWPS2PHLrri;
37934 case X86::PTILEMOVROWrri:
37935 Opc = X86::TILEMOVROWrri;
37939 MIB.
add(
MI.getOperand(0));
37941 MIB.
addImm(
MI.getOperand(2).getImm());
37943 MI.eraseFromParent();
37946 case X86::PTCVTROWPS2BF16Hrre:
37947 case X86::PTCVTROWPS2BF16Lrre:
37948 case X86::PTCVTROWPS2PHHrre:
37949 case X86::PTCVTROWPS2PHLrre:
37950 case X86::PTCVTROWD2PSrre:
37951 case X86::PTILEMOVROWrre: {
37954 switch (
MI.getOpcode()) {
37957 case X86::PTCVTROWD2PSrre:
37958 Opc = X86::TCVTROWD2PSrre;
37960 case X86::PTCVTROWPS2BF16Hrre:
37961 Opc = X86::TCVTROWPS2BF16Hrre;
37963 case X86::PTCVTROWPS2BF16Lrre:
37964 Opc = X86::TCVTROWPS2BF16Lrre;
37966 case X86::PTCVTROWPS2PHHrre:
37967 Opc = X86::TCVTROWPS2PHHrre;
37969 case X86::PTCVTROWPS2PHLrre:
37970 Opc = X86::TCVTROWPS2PHLrre;
37972 case X86::PTILEMOVROWrre:
37973 Opc = X86::TILEMOVROWrre;
37977 MIB.
add(
MI.getOperand(0));
37979 MIB.
add(
MI.getOperand(2));
37981 MI.eraseFromParent();
37994 const APInt &DemandedElts,
37996 EVT VT =
Op.getValueType();
37997 unsigned Opcode =
Op.getOpcode();
38004 auto NeedsSignExtension = [&](
SDValue V,
unsigned ActiveBits) {
38007 for (
unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
38008 if (!DemandedElts[i] || V.getOperand(i).isUndef())
38010 const APInt &Val = V.getConstantOperandAPInt(i);
38020 if (EltSize > ActiveBits && EltSize > 1 &&
isTypeLegal(VT) &&
38022 NeedsSignExtension(
Op.getOperand(1), ActiveBits)) {
38046 const APInt &Mask =
C->getAPIntValue();
38061 Width = std::min(Width, EltSize);
38068 if (ZeroExtendMask == Mask)
38085 const APInt &DemandedElts,
38088 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38097 Known = Known.
zext(64);
38102 const APInt &DemandedElts,
38105 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38109 APInt DemandedLoElts =
38111 APInt DemandedHiElts =
38124 const APInt &DemandedElts,
38127 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38132 APInt DemandedLoElts =
38134 APInt DemandedHiElts =
38150 APInt DemandedEltsLHS, DemandedEltsRHS;
38152 DemandedElts, DemandedEltsLHS,
38155 const auto ComputeForSingleOpFunc =
38157 return KnownBitsFunc(
38162 if (DemandedEltsRHS.
isZero())
38163 return ComputeForSingleOpFunc(
Op.getOperand(0), DemandedEltsLHS);
38164 if (DemandedEltsLHS.
isZero())
38165 return ComputeForSingleOpFunc(
Op.getOperand(1), DemandedEltsRHS);
38167 return ComputeForSingleOpFunc(
Op.getOperand(0), DemandedEltsLHS)
38168 .intersectWith(ComputeForSingleOpFunc(
Op.getOperand(1), DemandedEltsRHS));
38173 const APInt &DemandedElts,
38175 unsigned Depth)
const {
38178 unsigned Opc =
Op.getOpcode();
38179 EVT VT =
Op.getValueType();
38184 "Should use MaskedValueIsZero if you don't know whether Op"
38185 " is a target node!");
38207 unsigned NumLoBits =
Op.getOperand(0).getValueType().getVectorNumElements();
38214 EVT SrcVT = Src.getValueType();
38216 Op.getConstantOperandVal(1));
38225 unsigned ShAmt =
Op.getConstantOperandVal(1);
38239 Known.
Zero <<= ShAmt;
38240 Known.
One <<= ShAmt;
38256 APInt DemandedLHS, DemandedRHS;
38263 if (!!DemandedLHS) {
38267 if (!!DemandedRHS) {
38292 if (!Src.getSimpleValueType().isVector()) {
38299 if (
Op.getResNo() == 0) {
38329 LHS.getValueType() ==
RHS.getValueType() &&
38330 LHS.getValueType().getScalarType() == MVT::i8 &&
38331 "Unexpected PSADBW types");
38356 LHS.getValueType() ==
RHS.getValueType() &&
38357 LHS.getValueType().getVectorElementType() == MVT::i16 &&
38358 "Unexpected PMADDWD types");
38366 LHS.getValueType() ==
RHS.getValueType() &&
38367 LHS.getValueType().getVectorElementType() == MVT::i8 &&
38368 "Unexpected PMADDUBSW types");
38398 if (
auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
38399 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
38400 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
38457 EVT SrcVT =
Op.getOperand(0).getValueType();
38460 if (NumElts > NumSrcElts && DemandedElts.
countr_zero() >= NumSrcElts)
38472 EVT SrcVT =
Op.getOperand(1).getValueType();
38475 if (NumElts > NumSrcElts && DemandedElts.
countr_zero() >= NumSrcElts)
38494 for (
unsigned I = 0;
I != NumElts; ++
I) {
38495 if (!DemandedElts[
I])
38497 if (UndefElts[
I]) {
38511 Op, DemandedElts,
Depth, DAG,
38515 KnownLHS, KnownRHS);
38520 switch (
Op->getConstantOperandVal(0)) {
38521 case Intrinsic::x86_sse2_pmadd_wd:
38522 case Intrinsic::x86_avx2_pmadd_wd:
38523 case Intrinsic::x86_avx512_pmaddw_d_512: {
38527 LHS.getValueType() ==
RHS.getValueType() &&
38528 LHS.getValueType().getScalarType() == MVT::i16 &&
38529 "Unexpected PMADDWD types");
38533 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
38534 case Intrinsic::x86_avx2_pmadd_ub_sw:
38535 case Intrinsic::x86_avx512_pmaddubs_w_512: {
38539 LHS.getValueType() ==
RHS.getValueType() &&
38540 LHS.getValueType().getScalarType() == MVT::i8 &&
38541 "Unexpected PMADDUBSW types");
38545 case Intrinsic::x86_sse2_psad_bw:
38546 case Intrinsic::x86_avx2_psad_bw:
38547 case Intrinsic::x86_avx512_psad_bw_512: {
38551 LHS.getValueType() ==
RHS.getValueType() &&
38552 LHS.getValueType().getScalarType() == MVT::i8 &&
38553 "Unexpected PSADBW types");
38568 unsigned NumOps = Ops.
size();
38570 if (Mask.size() == NumElts) {
38573 for (
unsigned i = 0; i != NumElts; ++i) {
38574 if (!DemandedElts[i])
38587 assert(0 <= M && (
unsigned)M < (NumOps * NumElts) &&
38588 "Shuffle index out of range");
38590 unsigned OpIdx = (
unsigned)M / NumElts;
38591 unsigned EltIdx = (
unsigned)M % NumElts;
38597 DemandedOps[OpIdx].setBit(EltIdx);
38600 for (
unsigned i = 0; i != NumOps && !Known.
isUnknown(); ++i) {
38601 if (!DemandedOps[i])
38614 unsigned Depth)
const {
38615 EVT VT =
Op.getValueType();
38617 unsigned Opcode =
Op.getOpcode();
38625 MVT SrcVT = Src.getSimpleValueType();
38627 assert(VTBits < NumSrcBits &&
"Illegal truncation input type");
38630 if (Tmp > (NumSrcBits - VTBits))
38631 return Tmp - (NumSrcBits - VTBits);
38637 APInt DemandedLHS, DemandedRHS;
38643 auto NumSignBitsPACKSS = [&](
SDValue V,
const APInt &Elts) ->
unsigned {
38647 V.getScalarValueSizeInBits() == 32) {
38659 unsigned SrcBits =
Op.getOperand(0).getScalarValueSizeInBits();
38660 unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
38662 Tmp0 = NumSignBitsPACKSS(
Op.getOperand(0), DemandedLHS);
38664 Tmp1 = NumSignBitsPACKSS(
Op.getOperand(1), DemandedRHS);
38665 unsigned Tmp = std::min(Tmp0, Tmp1);
38666 if (Tmp > (SrcBits - VTBits))
38667 return Tmp - (SrcBits - VTBits);
38673 if (!Src.getSimpleValueType().isVector())
38680 const APInt &ShiftVal =
Op.getConstantOperandAPInt(1);
38681 if (ShiftVal.
uge(VTBits))
38684 if (ShiftVal.
uge(Tmp))
38691 APInt ShiftVal =
Op.getConstantOperandAPInt(1);
38692 if (ShiftVal.
uge(VTBits - 1))
38701 if (VT == MVT::f32 || VT == MVT::f64 ||
38702 ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1))
38717 if (Tmp0 == 1)
return 1;
38720 return std::min(Tmp0, Tmp1);
38725 if (Tmp0 == 1)
return 1;
38727 return std::min(Tmp0, Tmp1);
38737 unsigned NumOps = Ops.
size();
38739 if (Mask.size() == NumElts) {
38741 for (
unsigned i = 0; i != NumElts; ++i) {
38742 if (!DemandedElts[i])
38753 assert(0 <= M && (
unsigned)M < (NumOps * NumElts) &&
38754 "Shuffle index out of range");
38756 unsigned OpIdx = (
unsigned)M / NumElts;
38757 unsigned EltIdx = (
unsigned)M % NumElts;
38762 DemandedOps[OpIdx].setBit(EltIdx);
38764 unsigned Tmp0 = VTBits;
38765 for (
unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
38766 if (!DemandedOps[i])
38770 Tmp0 = std::min(Tmp0, Tmp1);
38783 return N->getOperand(0);
38806 bool AllowFloatDomain,
bool AllowIntDomain,
38809 MVT &SrcVT,
MVT &DstVT) {
38810 unsigned NumMaskElts = Mask.size();
38814 if (Mask[0] == 0 &&
38815 (MaskEltSize == 32 || (MaskEltSize == 16 && Subtarget.hasFP16()))) {
38820 if (MaskEltSize == 16)
38823 SrcVT = DstVT = !Subtarget.
hasSSE2() ? MVT::v4f32 : MaskVT;
38832 unsigned MaxScale = 64 / MaskEltSize;
38835 for (
unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
38836 bool MatchAny =
true;
38837 bool MatchZero =
true;
38838 bool MatchSign = UseSign;
38839 unsigned NumDstElts = NumMaskElts / Scale;
38840 for (
unsigned i = 0;
38841 i != NumDstElts && (MatchAny || MatchSign || MatchZero); ++i) {
38843 MatchAny = MatchSign = MatchZero =
false;
38846 unsigned Pos = (i * Scale) + 1;
38847 unsigned Len = Scale - 1;
38852 if (MatchAny || MatchSign || MatchZero) {
38853 assert((MatchSign || MatchZero) &&
38854 "Failed to match sext/zext but matched aext?");
38855 unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
38874 if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.
hasSSE2()) ||
38875 (MaskEltSize == 16 && Subtarget.hasFP16())) &&
38879 if (MaskEltSize == 16)
38882 SrcVT = DstVT = !Subtarget.
hasSSE2() ? MVT::v4f32 : MaskVT;
38892 SrcVT = DstVT = MVT::v2f64;
38897 SrcVT = DstVT = MVT::v4f32;
38902 SrcVT = DstVT = MVT::v4f32;
38908 assert(Subtarget.
hasAVX() &&
"AVX required for 256-bit vector shuffles");
38911 SrcVT = DstVT = MVT::v4f64;
38914 if (
isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
38917 SrcVT = DstVT = MVT::v8f32;
38920 if (
isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG,
38923 SrcVT = DstVT = MVT::v8f32;
38930 "AVX512 required for 512-bit vector shuffles");
38931 if (
isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
38934 SrcVT = DstVT = MVT::v8f64;
38939 {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) {
38941 SrcVT = DstVT = MVT::v16f32;
38946 {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) {
38948 SrcVT = DstVT = MVT::v16f32;
38960 const APInt &Zeroable,
38961 bool AllowFloatDomain,
bool AllowIntDomain,
38964 unsigned &Shuffle,
MVT &ShuffleVT,
38965 unsigned &PermuteImm) {
38966 unsigned NumMaskElts = Mask.size();
38968 unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
38973 if (!ContainsZeros && MaskScalarSizeInBits == 64) {
38979 ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
38987 ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
38992 }
else if (AllowFloatDomain && Subtarget.
hasAVX()) {
38997 for (
int i = 0, e = Mask.size(); i != e; ++i) {
39001 assert(((M / 2) == (i / 2)) &&
"Out of range shuffle mask index");
39002 PermuteImm |= (M & 1) << i;
39010 for (
unsigned Order = 0; Order < 2; ++Order) {
39011 if (Subtarget.preferLowerShuffleAsShift() ? (Order == 1) : (Order == 0)) {
39015 if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
39016 !ContainsZeros && (AllowIntDomain || Subtarget.
hasAVX())) {
39021 if (MaskScalarSizeInBits == 64)
39025 ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
39033 if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
39055 int OffsetHiMask[4];
39056 for (
int i = 0; i != 4; ++i)
39057 OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
39068 if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
39073 if (0 < RotateAmt) {
39081 if (AllowIntDomain &&
39087 Zeroable, Subtarget);
39088 if (0 < ShiftAmt && (!ShuffleVT.
is512BitVector() || Subtarget.hasBWI() ||
39109 bool AllowFloatDomain,
bool AllowIntDomain,
39112 unsigned &Shuffle,
MVT &SrcVT,
MVT &DstVT,
39114 unsigned NumMaskElts = Mask.size();
39120 AllowFloatDomain) {
39124 SrcVT = DstVT = Subtarget.
hasSSE2() ? MVT::v2f64 : MVT::v4f32;
39128 AllowFloatDomain) {
39131 SrcVT = DstVT = Subtarget.
hasSSE2() ? MVT::v2f64 : MVT::v4f32;
39135 Subtarget.
hasSSE2() && (AllowFloatDomain || !Subtarget.
hasSSE41())) {
39138 SrcVT = DstVT = MVT::v2f64;
39142 (AllowFloatDomain || !Subtarget.
hasSSE41())) {
39144 SrcVT = DstVT = MVT::v4f32;
39147 if (
isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7},
39149 Subtarget.hasFP16()) {
39151 SrcVT = DstVT = MVT::v8f16;
39157 if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.
hasSSE2()) ||
39158 ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.
hasInt256()) ||
39159 ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
39167 if (MaskVT == MVT::v4i32 && Subtarget.
hasSSE2() &&
39170 V2.getScalarValueSizeInBits() == 64) {
39174 if (Subtarget.
hasSSE41() && MinLZV1 >= 48 && MinLZV2 >= 48) {
39175 SrcVT = MVT::v4i32;
39176 DstVT = MVT::v8i16;
39181 if (MinLZV1 >= 56 && MinLZV2 >= 56) {
39182 SrcVT = MVT::v8i16;
39183 DstVT = MVT::v16i8;
39189 SrcVT = MVT::v4i32;
39190 DstVT = MVT::v8i16;
39197 if ((MaskVT == MVT::v4f32 && Subtarget.
hasSSE1()) ||
39202 (32 <= EltSizeInBits || Subtarget.hasBWI()))) {
39205 SrcVT = DstVT = MaskVT;
39207 SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
39216 SizeInBits == V2.getValueSizeInBits() &&
39218 (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
39219 bool IsBlend =
true;
39221 unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
39222 unsigned Scale1 = NumV1Elts / NumMaskElts;
39223 unsigned Scale2 = NumV2Elts / NumMaskElts;
39226 for (
unsigned i = 0; i != NumMaskElts; ++i) {
39231 DemandedZeroV1.
setBits(i * Scale1, (i + 1) * Scale1);
39232 DemandedZeroV2.
setBits(i * Scale2, (i + 1) * Scale2);
39236 DemandedZeroV2.
setBits(i * Scale2, (i + 1) * Scale2);
39239 if (M == (
int)(i + NumMaskElts)) {
39240 DemandedZeroV1.
setBits(i * Scale1, (i + 1) * Scale1);
39253 if (NumV1Elts == NumV2Elts && NumV1Elts == NumMaskElts) {
39257 auto computeKnownBitsElementWise = [&DAG](
SDValue V) {
39258 unsigned NumElts = V.getValueType().getVectorNumElements();
39260 for (
unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
39263 if (PeepholeKnown.
isZero())
39271 KnownBits V1Known = computeKnownBitsElementWise(V1);
39272 KnownBits V2Known = computeKnownBitsElementWise(V2);
39274 for (
unsigned i = 0; i != NumMaskElts && IsBlend; ++i) {
39279 IsBlend &= V1Known.
Zero[i] && V2Known.
Zero[i];
39283 IsBlend &= V2Known.
Zero[i] || V1Known.
One[i];
39286 if (M == (
int)(i + NumMaskElts)) {
39287 IsBlend &= V1Known.
Zero[i] || V2Known.
One[i];
39306 bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1,
SDValue &V2,
39308 unsigned &Shuffle,
MVT &ShuffleVT,
unsigned &PermuteImm) {
39309 unsigned NumMaskElts = Mask.size();
39313 if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) &&
39319 if (0 < Rotation) {
39321 if (EltSizeInBits == 64)
39325 PermuteImm = Rotation;
39336 if (0 < ByteRotation) {
39339 PermuteImm = ByteRotation;
39347 (MaskVT == MVT::v16i16 && Subtarget.
hasAVX2())) {
39349 bool ForceV1Zero =
false, ForceV2Zero =
false;
39352 ForceV2Zero, BlendMask)) {
39353 if (MaskVT == MVT::v16i16) {
39359 "Repeated mask size doesn't match!");
39361 for (
int i = 0; i < 8; ++i)
39362 if (RepeatedMask[i] >= 8)
39363 PermuteImm |= 1 << i;
39367 ShuffleVT = MaskVT;
39375 ShuffleVT = MaskVT;
39383 if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.
hasSSE41() &&
39387 ShuffleVT = MVT::v4f32;
39392 if (AllowFloatDomain && EltSizeInBits == 64 &&
39396 bool ForceV1Zero =
false, ForceV2Zero =
false;
39398 PermuteImm, Mask, Zeroable)) {
39408 if (AllowFloatDomain && EltSizeInBits == 32 &&
39416 auto MatchHalf = [&](
unsigned Offset,
int &S0,
int &
S1) {
39418 int M1 = RepeatedMask[
Offset + 1];
39439 int ShufMask[4] = {-1, -1, -1, -1};
39440 SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
39441 SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);
39455 if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.
hasSSE41() &&
39459 ShuffleVT = MVT::v4f32;
39468 bool HasVariableMask,
bool AllowVariableCrossLaneMask,
39483 bool HasVariableMask,
39484 bool AllowVariableCrossLaneMask,
39485 bool AllowVariablePerLaneMask,
39488 assert(!BaseMask.
empty() &&
"Cannot combine an empty shuffle mask!");
39490 "Unexpected number of shuffle inputs!");
39498 auto CanonicalizeShuffleInput = [&](
MVT VT,
SDValue Op) {
39508 bool UnaryShuffle = (Inputs.
size() == 1);
39514 MVT VT2 = V2.getSimpleValueType();
39516 (RootSizeInBits % VT2.
getSizeInBits()) == 0 &&
"Vector size mismatch");
39520 unsigned NumBaseMaskElts = BaseMask.
size();
39521 if (NumBaseMaskElts == 1) {
39522 assert(BaseMask[0] == 0 &&
"Invalid shuffle index found!");
39523 return CanonicalizeShuffleInput(RootVT, V1);
39527 unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
39535 bool IsMaskedShuffle =
false;
39536 if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
39539 IsMaskedShuffle =
true;
39546 if (UnaryShuffle && !
isAnyZero(BaseMask) &&
39550 return CanonicalizeShuffleInput(RootVT, V1);
39560 if (Mask.size() <= NumElts &&
39562 for (
unsigned i = 0; i != NumElts; ++i)
39566 return CanonicalizeShuffleInput(RootVT, V1);
39572 (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) {
39580 "Unexpected lane shuffle");
39581 Res = CanonicalizeShuffleInput(RootVT, V1);
39582 unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
39590 assert((BaseMaskEltSizeInBits % 128) == 0 &&
"Illegal mask size");
39594 auto MatchSHUF128 = [&](
MVT ShuffleVT,
const SDLoc &
DL,
39597 int PermMask[4] = {-1, -1, -1, -1};
39600 for (
int i = 0; i < 4; ++i) {
39601 assert(ScaledMask[i] >= -1 &&
"Illegal shuffle sentinel value");
39602 if (ScaledMask[i] < 0)
39605 SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
39612 PermMask[i] = ScaledMask[i] % 4;
39616 CanonicalizeShuffleInput(ShuffleVT, Ops[0]),
39617 CanonicalizeShuffleInput(ShuffleVT, Ops[1]),
39624 bool PreferPERMQ = UnaryShuffle &&
isUndefOrInRange(ScaledMask[0], 0, 2) &&
39628 (ScaledMask[0] < 0 || ScaledMask[2] < 0 ||
39629 ScaledMask[0] == (ScaledMask[2] % 2)) &&
39630 (ScaledMask[1] < 0 || ScaledMask[3] < 0 ||
39631 ScaledMask[1] == (ScaledMask[3] % 2));
39633 if (!
isAnyZero(ScaledMask) && !PreferPERMQ) {
39636 MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
39637 if (
SDValue V = MatchSHUF128(ShuffleVT,
DL, ScaledMask, V1, V2, DAG))
39651 Res = CanonicalizeShuffleInput(RootVT, V1);
39660 if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) &&
39664 SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
39665 SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
39676 if (UnaryShuffle &&
39679 unsigned PermMask = 0;
39680 PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
39681 PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
39691 if (!UnaryShuffle && !IsMaskedShuffle) {
39693 "Unexpected shuffle sentinel value");
39695 if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
39696 unsigned PermMask = 0;
39697 PermMask |= ((Mask[0] & 3) << 0);
39698 PermMask |= ((Mask[1] & 3) << 4);
39702 CanonicalizeShuffleInput(RootVT,
LHS),
39703 CanonicalizeShuffleInput(RootVT,
RHS),
39711 if (BaseMaskEltSizeInBits > 64) {
39712 assert((BaseMaskEltSizeInBits % 64) == 0 &&
"Illegal mask size");
39713 int MaskScale = BaseMaskEltSizeInBits / 64;
39716 Mask = std::move(ScaledMask);
39722 if (IsMaskedShuffle && NumRootElts > Mask.size()) {
39723 assert((NumRootElts % Mask.size()) == 0 &&
"Illegal mask size");
39724 int MaskScale = NumRootElts / Mask.size();
39727 Mask = std::move(ScaledMask);
39730 unsigned NumMaskElts = Mask.size();
39731 unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
39735 FloatDomain &= (32 <= MaskEltSizeInBits);
39745 MVT ShuffleSrcVT, ShuffleVT;
39746 unsigned Shuffle, PermuteImm;
39751 bool AllowFloatDomain = FloatDomain || (
Depth >= 3);
39752 bool AllowIntDomain = (!FloatDomain || (
Depth >= 3)) && Subtarget.
hasSSE2() &&
39756 APInt KnownUndef, KnownZero;
39758 APInt Zeroable = KnownUndef | KnownZero;
39760 if (UnaryShuffle) {
39764 (Subtarget.
hasAVX() && 32 <= MaskEltSizeInBits)) &&
39765 (!IsMaskedShuffle || NumRootElts == NumMaskElts)) {
39779 Res = CanonicalizeShuffleInput(MaskVT, V1);
39787 DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
39788 (!IsMaskedShuffle ||
39792 Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39793 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, Res);
39798 AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
39800 (!IsMaskedShuffle ||
39804 Res = CanonicalizeShuffleInput(ShuffleVT, V1);
39805 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, Res,
39814 if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
39817 if (MaskEltSizeInBits == 32) {
39818 SDValue SrcV1 = V1, SrcV2 = V2;
39825 CanonicalizeShuffleInput(MVT::v4f32, SrcV1),
39826 CanonicalizeShuffleInput(MVT::v4f32, SrcV2),
39831 if (MaskEltSizeInBits == 64 &&
39834 V2.getScalarValueSizeInBits() <= 32) {
39837 PermuteImm = ( 2 << 4) | ( 0 << 0);
39839 CanonicalizeShuffleInput(MVT::v4f32, V1),
39840 CanonicalizeShuffleInput(MVT::v4f32, V2),
39849 NewV2,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
39850 ShuffleVT, UnaryShuffle) &&
39854 NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
39855 NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2);
39856 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, NewV1, NewV2);
39863 AllowIntDomain, NewV1, NewV2,
DL, DAG,
39864 Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
39868 NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1);
39869 NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2);
39870 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, NewV1, NewV2,
39880 if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
39886 V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
39896 V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
39897 V2 = CanonicalizeShuffleInput(IntMaskVT, V2);
39906 if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.
hasAVX512()) {
39916 V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39917 Res = DAG.
getNode(Opc,
DL, ShuffleVT, V1);
39924 if (RootSizeInBits < 512 &&
39927 (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
39937 V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39938 V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2);
39953 int VariableCrossLaneShuffleDepth =
39954 Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
39955 int VariablePerLaneShuffleDepth =
39956 Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2;
39957 AllowVariableCrossLaneMask &=
39958 (
Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask;
39959 AllowVariablePerLaneMask &=
39960 (
Depth >= VariablePerLaneShuffleDepth) || HasVariableMask;
39963 bool AllowBWIVPERMV3 =
39964 (
Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask);
39968 AllowVariableCrossLaneMask = AllowVariablePerLaneMask =
true;
39970 bool MaskContainsZeros =
isAnyZero(Mask);
39974 if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) {
39976 (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) {
39978 Res = CanonicalizeShuffleInput(MaskVT, V1);
39984 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
39985 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
39986 (Subtarget.hasBWI() &&
39987 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
39988 (Subtarget.hasVBMI() &&
39989 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) {
39990 V1 = CanonicalizeShuffleInput(MaskVT, V1);
39999 if (UnaryShuffle && AllowVariableCrossLaneMask &&
40001 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
40002 MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
40003 MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 ||
40004 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
40005 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40006 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
40007 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40008 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
40010 for (
unsigned i = 0; i != NumMaskElts; ++i)
40012 Mask[i] = NumMaskElts + i;
40013 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40022 Inputs, Root, BaseMask,
Depth, HasVariableMask,
40023 AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
40025 return WideShuffle;
40029 if (AllowVariableCrossLaneMask && !MaskContainsZeros &&
40031 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
40032 MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
40033 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 ||
40034 MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
40035 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40036 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
40037 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40038 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
40039 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40040 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40049 if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask &&
40054 APInt UndefElts(NumMaskElts, 0);
40056 for (
unsigned i = 0; i != NumMaskElts; ++i) {
40067 Res = CanonicalizeShuffleInput(MaskVT, V1);
40068 unsigned AndOpcode =
40070 Res = DAG.
getNode(AndOpcode,
DL, MaskVT, Res, BitMask);
40077 if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
40078 ((MaskVT == MVT::v8f32 && Subtarget.
hasAVX()) ||
40079 (MaskVT == MVT::v16f32 && Subtarget.
hasAVX512()))) {
40081 for (
int M : Mask) {
40087 Res = CanonicalizeShuffleInput(MaskVT, V1);
40094 if (AllowVariablePerLaneMask && Subtarget.hasXOP() &&
40095 (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
40096 MaskVT == MVT::v8f32)) {
40102 unsigned NumEltsPerLane = NumMaskElts / NumLanes;
40104 unsigned M2ZImm = 0;
40105 for (
int M : Mask) {
40115 int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
40119 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40120 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40132 if (UnaryShuffle && AllowVariablePerLaneMask &&
40138 int Ratio = NumBytes / NumMaskElts;
40139 for (
int i = 0; i < NumBytes; ++i) {
40140 int M = Mask[i / Ratio];
40149 M = Ratio * M + i % Ratio;
40150 assert((M / 16) == (i / 16) &&
"Lane crossing detected");
40154 Res = CanonicalizeShuffleInput(ByteVT, V1);
40164 Subtarget.hasXOP()) {
40170 int Ratio = NumBytes / NumMaskElts;
40171 for (
int i = 0; i < NumBytes; ++i) {
40172 int M = Mask[i / Ratio];
40181 M = Ratio * M + i % Ratio;
40184 MVT ByteVT = MVT::v16i8;
40185 V1 = CanonicalizeShuffleInput(ByteVT, V1);
40186 V2 = CanonicalizeShuffleInput(ByteVT, V2);
40195 Inputs, Root, BaseMask,
Depth, HasVariableMask,
40196 AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
40197 return WideShuffle;
40201 if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
40203 (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 ||
40204 MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 ||
40205 MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 ||
40206 MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 ||
40207 MaskVT == MVT::v16i32)) ||
40208 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40209 (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 ||
40210 MaskVT == MVT::v32i16)) ||
40211 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40212 (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 ||
40213 MaskVT == MVT::v64i8)))) {
40214 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40215 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40234 bool HasVariableMask,
bool AllowVariableCrossLaneMask,
40237 unsigned NumMaskElts = BaseMask.
size();
40238 unsigned NumInputs = Inputs.
size();
40239 if (NumInputs == 0)
40244 unsigned RootEltSizeInBits = RootSizeInBits / NumMaskElts;
40245 assert((RootSizeInBits % NumMaskElts) == 0 &&
"Unexpected root shuffle mask");
40249 unsigned WideSizeInBits = RootSizeInBits;
40250 for (
SDValue Input : Inputs) {
40258 Input.getOperand(0).isUndef()) {
40265 WideSizeInBits < Input.getValueSizeInBits())
40266 WideSizeInBits = Input.getValueSizeInBits();
40270 unsigned Scale = WideSizeInBits / RootSizeInBits;
40271 if (WideSizeInBits <= RootSizeInBits ||
40272 (WideSizeInBits % RootSizeInBits) != 0)
40277 for (
int &M : WideMask) {
40280 M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
40286 int AdjustedMasks = 0;
40288 for (
unsigned I = 0;
I != NumInputs; ++
I) {
40298 Idx = (
Idx * InputEltSizeInBits) / RootEltSizeInBits;
40300 int lo =
I * WideMask.
size();
40301 int hi = (
I + 1) * WideMask.
size();
40302 for (
int &M : WideMask)
40303 if (lo <= M && M < hi)
40322 assert(!WideInputs.
empty() &&
"Shuffle with no inputs detected");
40327 if (AdjustedMasks == 0 || WideInputs.
size() > 2)
40335 while (WideMask.
size() > 1) {
40339 WideMask = std::move(WidenedMask);
40346 std::swap(WideInputs[0], WideInputs[1]);
40350 Depth += AdjustedMasks;
40354 SDValue WideRoot = WideInputs.
front().getValueSizeInBits() >
40355 WideInputs.
back().getValueSizeInBits()
40356 ? WideInputs.
front()
40357 : WideInputs.
back();
40359 "WideRootSize mismatch");
40363 HasVariableMask, AllowVariableCrossLaneMask,
40364 AllowVariablePerLaneMask, DAG, Subtarget)) {
40379 if (Mask.empty() || Ops.
empty())
40391 return V.getOpcode() != Opcode0 || V.getValueType() != VT0;
40398 if (!isHoriz && !isPack)
40403 return Op.hasOneUse() &&
40409 int NumEltsPerLane = NumElts / NumLanes;
40410 int NumHalfEltsPerLane = NumEltsPerLane / 2;
40412 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
40414 if (NumEltsPerLane >= 4 &&
40425 auto GetHOpSrc = [&](
int M) {
40438 SDValue M2 = GetHOpSrc(ScaledMask[2]);
40439 SDValue M3 = GetHOpSrc(ScaledMask[3]);
40440 if (
M0 &&
M1 && M2 && M3) {
40447 if (Ops.
size() >= 2) {
40449 auto GetHOpSrc = [&](
int M,
int &OutM) {
40453 SDValue Src = BC[M / 4].getOperand((M % 4) >= 2);
40454 if (!
LHS ||
LHS == Src) {
40459 if (!
RHS ||
RHS == Src) {
40461 OutM = (M % 2) + 2;
40466 int PostMask[4] = {-1, -1, -1, -1};
40467 if (GetHOpSrc(ScaledMask[0], PostMask[0]) &&
40468 GetHOpSrc(ScaledMask[1], PostMask[1]) &&
40469 GetHOpSrc(ScaledMask[2], PostMask[2]) &&
40470 GetHOpSrc(ScaledMask[3], PostMask[3])) {
40485 if (2 < Ops.
size())
40493 if (Ops.
size() == 2) {
40508 for (
int &M : Mask) {
40511 int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0;
40512 M -= NumElts + (SubLane * NumHalfEltsPerLane);
40514 M += NumHalfEltsPerLane;
40520 for (
int i = 0; i != NumElts; ++i) {
40525 (M % NumEltsPerLane) >= NumHalfEltsPerLane)
40526 M -= NumHalfEltsPerLane;
40528 (M % NumEltsPerLane) >= NumHalfEltsPerLane)
40529 M -= NumHalfEltsPerLane;
40540 bool SingleOp = (Ops.
size() == 1);
40541 if (isPack || OneUseOps ||
40545 Lo =
Lo.getOperand(WideMask128[0] & 1);
40546 Hi =
Hi.getOperand(WideMask128[1] & 1);
40562 if (Ops.
size() == 1 && NumLanes == 2 &&
40565 int M0 = WideMask64[0];
40566 int M1 = WideMask64[1];
40586 bool HasVariableMask,
40590 unsigned NumMaskElts = Mask.size();
40591 unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
40592 unsigned NumOps = Ops.
size();
40597 for (
unsigned I = 0;
I != NumOps; ++
I)
40608 if (IsOptimizingSize && !HasVariableMask &&
40613 APInt UndefElts(NumMaskElts, 0);
40614 APInt ZeroElts(NumMaskElts, 0);
40615 APInt ConstantElts(NumMaskElts, 0);
40618 for (
unsigned i = 0; i != NumMaskElts; ++i) {
40627 assert(0 <= M && M < (
int)(NumMaskElts * NumOps));
40629 unsigned SrcOpIdx = (
unsigned)M / NumMaskElts;
40630 unsigned SrcMaskIdx = (
unsigned)M % NumMaskElts;
40632 auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
40633 if (SrcUndefElts[SrcMaskIdx]) {
40638 auto &SrcEltBits = RawBitsOps[SrcOpIdx];
40639 APInt &Bits = SrcEltBits[SrcMaskIdx];
40646 ConstantBitData[i] = Bits;
40648 assert((UndefElts | ZeroElts | ConstantElts).isAllOnes());
40651 if ((UndefElts | ZeroElts).isAllOnes())
40656 if (VT.
isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
40709 unsigned MaxDepth,
bool HasVariableMask,
bool AllowVariableCrossLaneMask,
40713 (RootMask.
size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
40714 "Illegal shuffle root mask");
40716 assert(RootVT.
isVector() &&
"Shuffles operate on vector types!");
40722 if (
Depth >= MaxDepth)
40729 EVT VT =
Op.getValueType();
40738 "Can only combine shuffles upto size of the root op.");
40742 for (
int M : RootMask) {
40743 int BaseIdx = RootMask.
size() * SrcOpIndex;
40745 OpDemandedElts.
setBit(M - BaseIdx);
40750 unsigned NumOpMaskElts = RootMask.
size() / Scale;
40751 assert((RootMask.
size() % Scale) == 0 &&
"Root mask size mismatch");
40755 "Out of range elements referenced in root mask");
40756 OpDemandedElts = OpDemandedElts.
extractBits(NumOpMaskElts, 0);
40764 APInt OpUndef, OpZero;
40767 OpZero, DAG,
Depth,
false)) {
40775 (RootSizeInBits %
Op.getOperand(0).getValueSizeInBits()) == 0 &&
40778 int ExtractIdx =
Op.getConstantOperandVal(1);
40780 OpInputs.
assign({SrcVec});
40782 std::iota(OpMask.
begin(), OpMask.
end(), ExtractIdx);
40792 unsigned OpMaskSize = OpMask.
size();
40793 if (OpInputs.
size() > 1) {
40794 unsigned PaddedMaskSize = NumSubVecs * OpMaskSize;
40795 for (
int &M : OpMask) {
40798 int EltIdx = M % OpMaskSize;
40799 int OpIdx = M / OpMaskSize;
40800 M = (PaddedMaskSize * OpIdx) + EltIdx;
40803 OpZero = OpZero.
zext(NumSubVecs * OpMaskSize);
40804 OpUndef = OpUndef.
zext(NumSubVecs * OpMaskSize);
40812 bool EmptyRoot = (
Depth == 0) && (RootMask.
size() == 1);
40816 bool ResolveKnownZeros =
true;
40819 for (
int i = 0, e = OpMask.
size(); i != e; ++i) {
40825 ResolveKnownZeros =
false;
40831 ResolveKnownZeros);
40844 for (
int i = 0, e = Ops.
size(); i < e; ++i)
40854 return Ops.
size() - 1;
40858 for (
SDValue OpInput : OpInputs)
40860 AddOp(OpInput, OpInputIdx.
empty() ? SrcOpIndex : -1));
40863 RootMask.
size() % OpMask.
size() == 0) ||
40864 (OpMask.
size() > RootMask.
size() &&
40865 OpMask.
size() % RootMask.
size() == 0) ||
40866 OpMask.
size() == RootMask.
size()) &&
40867 "The smaller number of elements must divide the larger.");
40872 assert(llvm::has_single_bit<uint32_t>(RootMask.
size()) &&
40873 "Non-power-of-2 shuffle mask sizes");
40874 assert(llvm::has_single_bit<uint32_t>(OpMask.
size()) &&
40875 "Non-power-of-2 shuffle mask sizes");
40879 unsigned MaskWidth = std::max<unsigned>(OpMask.
size(), RootMask.
size());
40880 unsigned RootRatio =
40881 std::max<unsigned>(1, OpMask.
size() >> RootMaskSizeLog2);
40882 unsigned OpRatio = std::max<unsigned>(1, RootMask.
size() >> OpMaskSizeLog2);
40883 assert((RootRatio == 1 || OpRatio == 1) &&
40884 "Must not have a ratio for both incoming and op masks!");
40898 for (
unsigned i = 0; i < MaskWidth; ++i) {
40899 unsigned RootIdx = i >> RootRatioLog2;
40900 if (RootMask[RootIdx] < 0) {
40902 Mask[i] = RootMask[RootIdx];
40906 unsigned RootMaskedIdx =
40908 ? RootMask[RootIdx]
40909 : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
40913 if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
40914 (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
40915 Mask[i] = RootMaskedIdx;
40919 RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
40920 unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
40921 if (OpMask[OpIdx] < 0) {
40924 Mask[i] = OpMask[OpIdx];
40929 unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx]
40930 : (OpMask[OpIdx] << OpRatioLog2) +
40931 (RootMaskedIdx & (OpRatio - 1));
40933 OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
40934 int InputIdx = OpMask[OpIdx] / (int)OpMask.
size();
40935 assert(0 <= OpInputIdx[InputIdx] &&
"Unknown target shuffle input");
40936 OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
40938 Mask[i] = OpMaskedIdx;
40944 for (
unsigned I = 0, E = Ops.
size();
I != E; ++
I) {
40948 Op =
Op.getOperand(1);
40949 unsigned Scale = RootSizeInBits /
Op.getValueSizeInBits();
40950 int Lo =
I * Mask.size();
40951 int Hi = (
I + 1) * Mask.size();
40952 int NewHi =
Lo + (Mask.size() / Scale);
40953 for (
int &M : Mask) {
40954 if (
Lo <= M && NewHi <= M && M <
Hi)
40963 (RootSizeInBits %
Op.getOperand(0).getValueSizeInBits()) == 0 &&
40965 Op =
Op.getOperand(0);
40979 assert(!Ops.
empty() &&
"Shuffle with no inputs detected");
40980 HasVariableMask |= IsOpVariableMask;
40994 for (
int i = 0, e = Ops.
size(); i < e; ++i) {
41000 bool AllowCrossLaneVar =
false;
41001 bool AllowPerLaneVar =
false;
41002 if (Ops[i].
getNode()->hasOneUse() ||
41004 AllowCrossLaneVar = AllowVariableCrossLaneMask;
41005 AllowPerLaneVar = AllowVariablePerLaneMask;
41008 Ops, i, Root, ResolvedMask, CombinedNodes,
Depth + 1, MaxDepth,
41009 HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
41017 RootVT, Ops, Mask, HasVariableMask, DAG,
DL, Subtarget))
41025 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
41037 Ops, Mask, RootSizeInBits,
DL, DAG, Subtarget))
41042 int OpIdx =
I.index();
41046 int Lo = OpIdx * Mask.size();
41047 int Hi =
Lo + Mask.size();
41050 APInt OpDemandedElts(Mask.size(), 0);
41051 for (
int MaskElt : Mask) {
41053 int OpEltIdx = MaskElt -
Lo;
41054 OpDemandedElts.
setBit(OpEltIdx);
41059 if (
Op.getValueSizeInBits() < RootSizeInBits) {
41061 unsigned NumExpectedVectorElts = Mask.size();
41062 unsigned EltSizeInBits = RootSizeInBits / NumExpectedVectorElts;
41063 unsigned NumOpVectorElts =
Op.getValueSizeInBits() / EltSizeInBits;
41065 NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&
41066 "Demanding the virtual undef widening padding?");
41067 OpDemandedElts = OpDemandedElts.
trunc(NumOpVectorElts);
41071 unsigned NumOpElts =
Op.getValueType().getVectorNumElements();
41077 Op, OpScaledDemandedElts, DAG))
41086 return Op.getValueSizeInBits() < RootSizeInBits;
41089 if (
Op.getValueSizeInBits() < RootSizeInBits)
41097 if (Ops.
size() <= 2) {
41103 while (Mask.size() > 1) {
41107 Mask = std::move(WidenedMask);
41119 Ops, Root, Mask,
Depth, HasVariableMask, AllowVariableCrossLaneMask,
41120 AllowVariablePerLaneMask, DAG, Subtarget))
41127 if (Ops.
size() != 2 || !Subtarget.
hasAVX2() || RootSizeInBits != 128 ||
41128 (RootSizeInBits / Mask.size()) != 64 ||
41131 LHS.getOperand(0) !=
RHS.getOperand(0))
41138 Ops, Root, Mask,
Depth, HasVariableMask, AllowVariableCrossLaneMask,
41139 AllowVariablePerLaneMask, DAG, Subtarget);
41157 MVT VT =
N.getSimpleValueType();
41169 for (
int i = 1, NumLanes = VT.
getSizeInBits() / 128; i < NumLanes; ++i)
41170 for (
int j = 0; j < LaneElts; ++j)
41171 assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
41172 "Mask doesn't repeat in high 128-bit lanes!");
41174 Mask.resize(LaneElts);
41177 switch (
N.getOpcode()) {
41184 Mask.erase(Mask.begin(), Mask.begin() + 4);
41185 for (
int &M : Mask)
41203 "Called with something other than an x86 128-bit half shuffle!");
41210 for (; V.hasOneUse(); V = V.getOperand(0)) {
41211 switch (V.getOpcode()) {
41227 if (Mask[0] != 0 || Mask[1] != 1 ||
41228 !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
41237 if (Mask[2] != 2 || Mask[3] != 3 ||
41238 !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
41248 if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
41249 V.getSimpleValueType().getVectorElementType() != MVT::i16)
41253 unsigned CombineOp =
41255 if (V.getOperand(0) != V.getOperand(1) ||
41256 !V->isOnlyUserOf(V.getOperand(0).getNode()))
41259 V = V.getOperand(0);
41261 switch (V.getOpcode()) {
41267 if (V.getOpcode() == CombineOp)
41274 V = V.getOperand(0);
41278 }
while (V.hasOneUse());
41285 if (!V.hasOneUse())
41291 for (
int &M : Mask)
41293 V = DAG.
getNode(V.getOpcode(),
DL, V.getValueType(), V.getOperand(0),
41297 while (!Chain.
empty()) {
41300 if (V.getValueType() != W.getOperand(0).getValueType())
41301 V = DAG.
getBitcast(W.getOperand(0).getValueType(), V);
41303 switch (W.getOpcode()) {
41309 V = DAG.
getNode(W.getOpcode(),
DL, W.getValueType(), V, V);
41315 V = DAG.
getNode(W.getOpcode(),
DL, W.getValueType(), V, W.getOperand(1));
41319 if (V.getValueType() !=
N.getValueType())
41331 if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32)
41336 if (V.getOpcode() !=
X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode()))
41338 SDValue N0 = V.getOperand(0);
41339 SDValue N1 = V.getOperand(1);
41340 unsigned Imm = V.getConstantOperandVal(2);
41345 Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
41350 switch (
N.getOpcode()) {
41352 if (
SDValue NewSHUFP = commuteSHUFP(
N,
N.getOperand(0))) {
41353 unsigned Imm =
N.getConstantOperandVal(1);
41361 unsigned Imm =
N.getConstantOperandVal(2);
41363 if (
SDValue NewSHUFP = commuteSHUFP(
N, N0))
41366 }
else if (
SDValue NewSHUFP = commuteSHUFP(
N, N0)) {
41369 }
else if (
SDValue NewSHUFP = commuteSHUFP(
N, N1)) {
41406 APInt Demanded0, DemandedLHS0, DemandedRHS0;
41407 APInt Demanded1, DemandedLHS1, DemandedRHS1;
41412 DemandedRHS0,
true) ||
41414 DemandedRHS1,
true))
41419 if (!DemandedRHS0.
isZero() || !DemandedRHS1.
isZero() ||
41427 for (
unsigned I = 0;
I != NumElts; ++
I) {
41428 if (Demanded0[
I]) {
41429 int M = ScaledMask0[
I];
41432 "BlendMask demands LHS AND RHS");
41433 NewBlendMask[M] = M;
41434 NewPermuteMask[
I] = M;
41436 }
else if (Demanded1[
I]) {
41437 int M = ScaledMask1[
I];
41440 "BlendMask demands LHS AND RHS");
41441 NewBlendMask[M] = M + NumElts;
41442 NewPermuteMask[
I] = M;
41452 if (VT == MVT::v16i16) {
41491 EVT ShuffleVT =
N.getValueType();
41492 unsigned Opc =
N.getOpcode();
41494 auto IsMergeableWithShuffle = [Opc, &DAG](
SDValue Op,
bool FoldShuf =
true,
41495 bool FoldLoad =
false) {
41505 (
Op.getOpcode() == Opc &&
Op->hasOneUse()) ||
41511 auto IsSafeToMoveShuffle = [ShuffleVT](
SDValue Op,
unsigned BinOp) {
41535 if (
N.getOperand(0).getValueType() == ShuffleVT &&
41536 N->isOnlyUserOf(
N.getOperand(0).getNode())) {
41540 if (TLI.
isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
41550 if (
N.getNumOperands() == 2) {
41551 LHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op00,
N.getOperand(1));
41552 RHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op01,
N.getOperand(1));
41563 if (SrcOpcode ==
ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
41568 N.getNumOperands() == 2
41569 ? DAG.
getNode(Opc,
DL, ShuffleVT, Op00,
N.getOperand(1))
41570 : DAG.
getNode(Opc,
DL, ShuffleVT, Op00);
41580 unsigned InsertPSMask =
N.getConstantOperandVal(2);
41581 unsigned ZeroMask = InsertPSMask & 0xF;
41592 if (
N->isOnlyUserOf(
N.getOperand(0).getNode()) &&
41593 N->isOnlyUserOf(
N.getOperand(1).getNode())) {
41599 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41600 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41607 if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) ||
41608 (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) ||
41609 ((IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op10)) &&
41610 (IsMergeableWithShuffle(Op01) || IsMergeableWithShuffle(Op11)))) {
41616 if (
N.getNumOperands() == 3) {
41617 LHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10,
N.getOperand(2));
41618 RHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op01, Op11,
N.getOperand(2));
41632 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41633 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41639 if (
N.getNumOperands() == 3) {
41640 Res = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10,
N.getOperand(2));
41642 Res = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10);
41655 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41656 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41662 DAG.
getNode(SrcOpcode,
DL, OpDstVT, Res));
41677 MVT VT = V.getSimpleValueType();
41685 if (!Src1.
isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1))
41694 Res = DAG.
getNode(SrcOpc0,
DL, SrcVT0, Res);
41699 if (SrcVT0 == MVT::v4f64) {
41701 if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
41733 using namespace SDPatternMatch;
41735 MVT VT =
N.getSimpleValueType();
41738 unsigned Opcode =
N.getOpcode();
41749 if (VT == MVT::v2f64 && Src.hasOneUse() &&
41766 EVT SrcVT = Src.getValueType();
41776 for (
unsigned i = 0; i != Scale; ++i)
41777 DemandedMask[i] = i;
41779 {BC}, 0, BC, DemandedMask, {}, 0,
41782 true, DAG, Subtarget))
41784 DAG.getBitcast(SrcVT, Res));
41821 Src.getValueType().getScalarType() == Src.getOperand(0).getValueType())
41827 Src.getValueType() ==
41828 Src.getOperand(0).getValueType().getScalarType() &&
41829 TLI.
isTypeLegal(Src.getOperand(0).getValueType()))
41837 User->getValueSizeInBits(0).getFixedValue() >
41854 bool NoReplaceExtract = Src.hasOneUse();
41856 if (NoReplaceExtract) {
41869 if (SrcVT == MVT::i16 && Src.getOpcode() ==
ISD::TRUNCATE &&
41870 Src.hasOneUse() && Src.getOperand(0).hasOneUse()) {
41872 SDValue TruncIn = Src.getOperand(0);
41896 LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
41914 isa<ConstantSDNode>(TruncIn.
getOperand(1)) &&
41922 unsigned Offset = ShiftAmt / 8;
41942 MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
41957 if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 ||
41958 SrcVT == MVT::v4i32) &&
41984 auto *LN = cast<LoadSDNode>(N0);
41998 auto *LN = cast<MemSDNode>(N0);
42001 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
42004 LN->getMemoryVT(), LN->getMemOperand());
42033 if (
auto *
C = dyn_cast<ConstantSDNode>(N0.
getOperand(0))) {
42039 ConstantVec[0] =
const_cast<ConstantInt *
>(
C->getConstantIntValue());
42046 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
42061 SDValue In = V.getOperand(1);
42063 In.getValueSizeInBits() /
42086 if ((EltBits % SrcBits) == 0 && SrcBits >= 32) {
42088 APInt BlendMask =
N.getConstantOperandAPInt(2).zextOrTrunc(NumElts);
42108 LHS.getOperand(1) !=
RHS.getOperand(1) &&
42113 "BLENDI decode mismatch");
42114 MVT ShufVT =
LHS.getSimpleValueType();
42119 ShufVT, {MaskLHS, MaskRHS}, ByteMask,
42120 true, DAG,
DL, Subtarget)) {
42122 LHS.getOperand(0), NewMask);
42124 RHS.getOperand(0), NewMask);
42126 DAG.getBitcast(VT, NewLHS),
42127 DAG.getBitcast(VT, NewRHS),
N.getOperand(2));
42138 if (VT == MVT::v4f32) {
42139 bool Updated =
false;
42143 for (
int i = 0; i != 2; ++i) {
42152 Mask[Ofs + 0] = SubScaledMask[Mask[Ofs + 0] % 4] + (i * 4);
42153 Mask[Ofs + 1] = SubScaledMask[Mask[Ofs + 1] % 4] + (i * 4);
42160 for (
int &M : Mask)
42177 EVT SrcVT = Src.getValueType();
42191 uint64_t Mask =
N->getConstantOperandVal(2);
42194 if ((Mask & 0x0A) == 0x0A &&
42199 if ((Mask & 0xA0) == 0xA0 &&
42204 if (NewLHS || NewRHS)
42206 NewRHS ? NewRHS :
RHS,
42217 EVT SrcVT =
LHS.getOperand(0).getValueType();
42218 if (
RHS.isUndef() || SrcVT ==
RHS.getOperand(0).getValueType()) {
42222 N->getOperand(2)));
42232 auto FindSubVector128 = [&](
unsigned Idx) {
42238 return SubOps[
Idx & 1];
42239 unsigned NumElts = Src.getValueType().getVectorNumElements();
42241 Src.getOperand(1).getValueSizeInBits() == 128 &&
42242 Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
42243 return Src.getOperand(1);
42247 unsigned Imm =
N.getConstantOperandVal(2);
42248 if (
SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
42249 if (
SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
42265 switch (V.getOpcode()) {
42274 MVT InnerVT = V.getSimpleValueType();
42279 Res = DAG.
getNode(V.getOpcode(),
DL, InnerVT, Res, V.getOperand(1));
42288 assert(Mask.size() == 4);
42315 return DAG.
getNode(Opcode,
DL, VT, N0, SclVec);
42322 assert(VT == MVT::v4f32 &&
"INSERTPS ValueType must be MVT::v4f32");
42325 unsigned InsertPSMask =
N.getConstantOperandVal(2);
42326 unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
42327 unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
42328 unsigned ZeroMask = InsertPSMask & 0xF;
42331 if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.
isUndef())
42336 if ((ZeroMask & (1u << DstIdx)) && !Op1.
isUndef())
42343 APInt KnownUndef1, KnownZero1;
42346 if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) {
42348 InsertPSMask |= (1u << DstIdx);
42353 int M = TargetMask1[SrcIdx];
42354 assert(0 <= M && M < 8 &&
"Shuffle index out of range");
42355 InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
42356 Op1 = Ops1[M < 4 ? 0 : 1];
42364 APInt KnownUndef0, KnownZero0;
42367 bool Updated =
false;
42368 bool UseInput00 =
false;
42369 bool UseInput01 =
false;
42370 for (
int i = 0; i != 4; ++i) {
42371 if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
42376 if (KnownUndef0[i] || KnownZero0[i]) {
42378 InsertPSMask |= (1u << i);
42384 int M = TargetMask0[i];
42385 if (M != i && M != (i + 4))
42389 UseInput00 |= (0 <= M && M < 4);
42390 UseInput01 |= (4 <= M);
42395 if (UseInput00 && !UseInput01) {
42398 }
else if (!UseInput00 && UseInput01) {
42412 auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
42413 if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
42415 MemIntr->getBasePtr(),
42416 MemIntr->getMemOperand());
42433 SDValue Ops[] = {
N.getOperand(0),
N.getOperand(2)};
42447 assert(Mask.size() == NumElts &&
"Unexpected shuffle mask size");
42450 MVT MaskVT =
N.getOperand(1).getSimpleValueType();
42453 for (
int &M : Mask)
42454 M = (M < 0 ? M : M & (Mask.size() - 1));
42470 for (
int &M : Mask)
42471 M = (M < (int)NumElts ? M : (M - (NumElts / 2)));
42495 return N.getOperand(0);
42499 switch (
N.getOpcode()) {
42510 int DMask[] = {0, 1, 2, 3};
42512 DMask[DOffset + 0] = DOffset + 1;
42513 DMask[DOffset + 1] = DOffset + 0;
42524 if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
42527 V.getOpcode() !=
N.getOpcode() &&
42528 V.hasOneUse() && V.getOperand(0).hasOneUse()) {
42536 for (
int i = 0; i < 4; ++i) {
42537 WordMask[i + NOffset] = Mask[i] + NOffset;
42538 WordMask[i + VOffset] = VMask[i] + VOffset;
42542 for (
int i = 0; i < 8; ++i)
42543 MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
42544 if (
ArrayRef<int>(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
42545 ArrayRef<int>(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
42572 int ParitySrc[2] = {-1, -1};
42573 unsigned Size = Mask.size();
42574 for (
unsigned i = 0; i !=
Size; ++i) {
42580 if ((M %
Size) != i)
42584 int Src = M /
Size;
42585 if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src)
42587 ParitySrc[i % 2] = Src;
42591 if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1])
42594 Op0Even = ParitySrc[0] == 0;
42611 EVT VT =
N->getValueType(0);
42633 if (!V1->
hasOneUse() || !V2->hasOneUse())
42641 if ((V2->getOperand(0) !=
LHS || V2->getOperand(1) !=
RHS) &&
42642 (V2->getOperand(0) !=
RHS || V2->getOperand(1) !=
LHS))
42646 LHS = V2->getOperand(0);
RHS = V2->getOperand(1);
42676 MVT VT =
N->getSimpleValueType(0);
42684 SDValue FMAdd = Op0, FMSub = Op1;
42701 bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd;
42720 MVT VT =
N->getSimpleValueType(0);
42726 return DAG.
getNode(Opc,
DL, VT, Opnd0, Opnd1, Opnd2);
42753 if (!Subtarget.
hasAVX2() || !isa<ShuffleVectorSDNode>(
N))
42756 EVT VT =
N->getValueType(0);
42783 auto *SVOp = cast<ShuffleVectorSDNode>(
N);
42784 for (
int Elt : SVOp->getMask())
42785 Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));
42809 int HalfIdx1, HalfIdx2;
42812 (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
42822 HalfIdx2,
false, DAG,
true);
42828 if (
auto *Shuf = dyn_cast<ShuffleVectorSDNode>(
N))
42835 EVT VT =
N->getValueType(0);
42844 VT,
SDValue(
N, 0), dl, DAG, Subtarget,
true))
42896 SDValue Mask =
Op.getOperand(MaskIndex);
42897 if (!Mask.hasOneUse())
42901 APInt MaskUndef, MaskZero;
42910 auto *Load = dyn_cast<LoadSDNode>(BC);
42911 if (!Load || !Load->getBasePtr().hasOneUse())
42918 Type *CTy =
C->getType();
42924 unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements();
42925 if (NumCstElts != NumElts && NumCstElts != (NumElts * 2))
42927 unsigned Scale = NumCstElts / NumElts;
42930 bool Simplified =
false;
42932 for (
unsigned i = 0; i != NumCstElts; ++i) {
42933 Constant *Elt =
C->getAggregateElement(i);
42934 if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) {
42947 SDValue LegalCV = LowerConstantPool(CV, TLO.
DAG);
42959 unsigned Opc =
Op.getOpcode();
42960 EVT VT =
Op.getValueType();
42966 APInt LHSUndef, LHSZero;
42967 APInt RHSUndef, RHSZero;
42977 KnownZero = LHSZero | RHSZero;
42982 APInt LHSUndef, LHSZero;
42983 APInt RHSUndef, RHSZero;
42998 APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
43002 APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero;
43012 LHS.getValueType() ==
RHS.getValueType() &&
43013 LHS.getValueType().getScalarType() == MVT::i8 &&
43014 "Unexpected PSADBW types");
43018 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
43024 if (NewLHS || NewRHS) {
43025 NewLHS = NewLHS ? NewLHS :
LHS;
43026 NewRHS = NewRHS ? NewRHS :
RHS;
43044 unsigned UseOpc = Use->getOpcode();
43045 return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL ||
43046 UseOpc == X86ISD::VSRA) &&
43047 Use->getOperand(0) != Amt;
43050 APInt AmtUndef, AmtZero;
43054 Depth + 1, AssumeSingleUse))
43075 Src, DemandedElts, TLO.
DAG,
Depth + 1))
43085 APInt LHSUndef, LHSZero;
43086 APInt RHSUndef, RHSZero;
43102 KnownZero = LHSZero;
43107 APInt LHSUndef, LHSZero;
43108 APInt RHSUndef, RHSZero;
43121 auto *Amt = cast<ConstantSDNode>(
Op.getOperand(1));
43122 assert(Amt->getAPIntValue().ult(NumElts) &&
"Out of range shift amount");
43123 unsigned ShiftAmt = Amt->getZExtValue();
43133 unsigned C1 = Src.getConstantOperandVal(1);
43135 int Diff = ShiftAmt - C1;
43144 Op, TLO.
DAG.
getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
43148 APInt DemandedSrc = DemandedElts.
lshr(ShiftAmt);
43153 KnownUndef <<= ShiftAmt;
43154 KnownZero <<= ShiftAmt;
43160 auto *Amt = cast<ConstantSDNode>(
Op.getOperand(1));
43161 assert(Amt->getAPIntValue().ult(NumElts) &&
"Out of range shift amount");
43162 unsigned ShiftAmt = Amt->getZExtValue();
43172 unsigned C1 = Src.getConstantOperandVal(1);
43174 int Diff = ShiftAmt - C1;
43183 Op, TLO.
DAG.
getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
43187 APInt DemandedSrc = DemandedElts.
shl(ShiftAmt);
43202 auto GetDemandedMasks = [&](
SDValue Op,
bool Invert =
false) {
43208 APInt OpElts = DemandedElts;
43213 for (
int I = 0;
I != NumElts; ++
I) {
43214 if (!DemandedElts[
I])
43216 if (UndefElts[
I]) {
43221 }
else if ((Invert && !EltBits[
I].isAllOnes()) ||
43222 (!Invert && !EltBits[
I].
isZero())) {
43223 OpBits |= Invert ? ~EltBits[
I] : EltBits[
I];
43228 return std::make_pair(OpBits, OpElts);
43230 APInt BitsLHS, EltsLHS;
43231 APInt BitsRHS, EltsRHS;
43232 std::tie(BitsLHS, EltsLHS) = GetDemandedMasks(
RHS);
43233 std::tie(BitsRHS, EltsRHS) = GetDemandedMasks(
LHS,
true);
43235 APInt LHSUndef, LHSZero;
43236 APInt RHSUndef, RHSZero;
43249 if (NewLHS || NewRHS) {
43250 NewLHS = NewLHS ? NewLHS :
LHS;
43251 NewRHS = NewRHS ? NewRHS :
RHS;
43263 EVT SrcVT = Src.getValueType();
43264 APInt SrcUndef, SrcZero;
43276 APInt DemandedLHS, DemandedRHS;
43279 APInt LHSUndef, LHSZero;
43283 APInt RHSUndef, RHSZero;
43297 if (NewN0 || NewN1) {
43298 NewN0 = NewN0 ? NewN0 : N0;
43299 NewN1 = NewN1 ? NewN1 : N1;
43313 APInt DemandedLHS, DemandedRHS;
43316 APInt LHSUndef, LHSZero;
43320 APInt RHSUndef, RHSZero;
43329 if (N0 != N1 && !DemandedElts.
isAllOnes()) {
43334 if (NewN0 || NewN1) {
43335 NewN0 = NewN0 ? NewN0 : N0;
43336 NewN1 = NewN1 ? NewN1 : N1;
43347 MVT SrcVT = Src.getSimpleValueType();
43349 APInt SrcUndef, SrcZero;
43362 DemandedElts, TLO.
DAG, Subtarget,
SDLoc(
Op)))
43367 APInt SelUndef, SelZero;
43369 SelZero, TLO,
Depth + 1))
43373 APInt LHSUndef, LHSZero;
43375 LHSZero, TLO,
Depth + 1))
43378 APInt RHSUndef, RHSZero;
43380 RHSZero, TLO,
Depth + 1))
43383 KnownZero = LHSZero & RHSZero;
43384 KnownUndef = LHSUndef & RHSUndef;
43390 APInt DemandedUpperElts = DemandedElts;
43400 if (DemandedElts == 1 &&
Op.getValue(1).use_empty() &&
isTypeLegal(SVT)) {
43402 auto *Mem = cast<MemSDNode>(
Op);
43404 Mem->getMemOperand());
43412 MVT SrcVT = Src.getSimpleValueType();
43414 if (DemandedElts == 1) {
43417 else if (Src.getValueType() != VT)
43424 APInt SrcUndef, SrcZero;
43432 Src, SrcElts, TLO.
DAG,
Depth + 1))
43460 DemandedElts.
lshr(NumElts / 2) == 0) {
43462 unsigned ExtSizeInBits = SizeInBits / 2;
43466 ExtSizeInBits = SizeInBits / 4;
43473 if (Src.getValueSizeInBits() > ExtSizeInBits)
43479 TLO.
DAG,
DL, ExtSizeInBits));
43483 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
43487 SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
43490 MemIntr->getMemOperand());
43494 TLO.
DAG,
DL, ExtSizeInBits));
43498 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
43499 EVT MemVT = MemIntr->getMemoryVT();
43504 MemIntr->getBasePtr(), MemIntr->getMemOperand());
43508 TLO.
DAG,
DL, ExtSizeInBits));
43517 TLO.
DAG,
DL, ExtSizeInBits));
43545 if (VT == MVT::v4f64 || VT == MVT::v4i64) {
43561 unsigned LoMask =
Op.getConstantOperandVal(2) & 0xF;
43565 unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2);
43566 unsigned SrcIdx = (LoMask & 0x2) >> 1;
43580 unsigned Scale = SizeInBits / ExtSizeInBits;
43582 MVT SrcVT =
SrcOp.getSimpleValueType();
43583 unsigned SrcExtSize =
43638 "Unsupported vector size");
43662 APInt OpUndef, OpZero;
43670 if (OpMask.
size() != (
unsigned)NumElts ||
43672 return VT.getSizeInBits() != V.getValueSizeInBits() ||
43673 !V.getValueType().isVector();
43677 KnownZero = OpZero;
43678 KnownUndef = OpUndef;
43681 int NumSrcs = OpInputs.
size();
43682 for (
int i = 0; i != NumElts; ++i)
43683 if (!DemandedElts[i])
43695 for (
int Src = 0; Src != NumSrcs; ++Src)
43700 for (
int Src = 0; Src != NumSrcs; ++Src) {
43705 int Lo = Src * NumElts;
43707 for (
int i = 0; i != NumElts; ++i)
43708 if (DemandedElts[i]) {
43709 int M = OpMask[i] -
Lo;
43710 if (0 <= M && M < NumElts)
43715 APInt SrcUndef, SrcZero;
43732 for (
int i = 0; i != NumElts; ++i)
43733 if (DemandedElts[i])
43734 DemandedMask[i] = i;
43739 true,
true, TLO.
DAG,
43751 unsigned Depth)
const {
43752 EVT VT =
Op.getValueType();
43754 unsigned Opc =
Op.getOpcode();
43759 MVT SrcVT = Src.getSimpleValueType();
43781 bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.
hasAVX512();
43783 DemandedMaskLHS = DemandedMask;
43785 DemandedMaskRHS = DemandedMask;
43788 KnownLHS, TLO,
Depth + 1))
43791 KnownRHS, TLO,
Depth + 1))
43795 KnownRHS = KnownRHS.
trunc(32);
43805 LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43807 RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43808 if (DemandedLHS || DemandedRHS) {
43809 DemandedLHS = DemandedLHS ? DemandedLHS :
LHS;
43810 DemandedRHS = DemandedRHS ? DemandedRHS :
RHS;
43822 Known, TLO,
Depth + 1))
43826 OriginalDemandedElts, Known2, TLO,
Depth + 1))
43831 OriginalDemandedElts, TLO))
43847 APInt DemandedMask = OriginalDemandedBits.
lshr(ShAmt);
43856 int Diff = ShAmt - Shift2Amt;
43869 unsigned NumSignBits =
43872 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
43879 Known.
Zero <<= ShAmt;
43880 Known.
One <<= ShAmt;
43888 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
43904 APInt DemandedMask = OriginalDemandedBits << ShAmt;
43919 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
43935 APInt DemandedMask = OriginalDemandedBits << ShAmt;
43944 unsigned NumSignBits =
43946 if (ShAmt < NumSignBits)
43976 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
43991 Sel, SignMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43993 LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43995 RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43997 if (NewSel || NewLHS || NewRHS) {
43998 NewSel = NewSel ? NewSel : Sel;
43999 NewLHS = NewLHS ? NewLHS :
LHS;
44000 NewRHS = NewRHS ? NewRHS :
RHS;
44002 NewSel, NewLHS, NewRHS));
44009 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(1));
44013 if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
44014 unsigned Idx = CIdx->getZExtValue();
44019 APInt DemandedVecBits = OriginalDemandedBits.
trunc(VecBitWidth);
44020 if (DemandedVecBits == 0)
44023 APInt KnownUndef, KnownZero;
44026 KnownZero, TLO,
Depth + 1))
44031 KnownVec, TLO,
Depth + 1))
44035 Vec, DemandedVecBits, DemandedVecElts, TLO.
DAG,
Depth + 1))
44048 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
44052 unsigned Idx = CIdx->getZExtValue();
44053 if (!OriginalDemandedElts[
Idx])
44057 APInt DemandedVecElts(OriginalDemandedElts);
44060 KnownVec, TLO,
Depth + 1))
44065 APInt DemandedSclBits = OriginalDemandedBits.
zext(NumSclBits);
44080 APInt DemandedLHS, DemandedRHS;
44086 KnownLHS, TLO,
Depth + 1))
44089 KnownRHS, TLO,
Depth + 1))
44094 Op.getOperand(0), SignMask, DemandedLHS, TLO.
DAG,
Depth + 1);
44096 Op.getOperand(1), SignMask, DemandedRHS, TLO.
DAG,
Depth + 1);
44097 if (DemandedOp0 || DemandedOp1) {
44098 SDValue Op0 = DemandedOp0 ? DemandedOp0 :
Op.getOperand(0);
44099 SDValue Op1 = DemandedOp1 ? DemandedOp1 :
Op.getOperand(1);
44107 MVT SrcVT = Src.getSimpleValueType();
44118 Src->hasOneUse()) {
44138 MVT SrcVT = Src.getSimpleValueType();
44143 if (OriginalDemandedBits.
countr_zero() >= NumElts)
44154 APInt KnownUndef, KnownZero;
44170 if (KnownSrc.
One[SrcBits - 1])
44172 else if (KnownSrc.
Zero[SrcBits - 1])
44177 Src, DemandedSrcBits, DemandedElts, TLO.
DAG,
Depth + 1))
44187 "Illegal vector type for X86ISD::TESTP");
44192 bool AssumeSingleUse = (Op0 == Op1) &&
Op->isOnlyUserOf(Op0.
getNode());
44194 AssumeSingleUse) ||
44201 OriginalDemandedElts, Known2, TLO,
Depth + 1))
44204 OriginalDemandedElts, Known, TLO,
Depth + 1))
44217 if (
auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
44219 uint64_t Val1 = Cst1->getZExtValue();
44220 uint64_t MaskedVal1 = Val1 & 0xFFFF;
44228 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
44229 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
44255 if (LengthBits.
isZero())
44265 unsigned DemandedBitsLZ = OriginalDemandedBits.
countl_zero();
44292 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
44299 unsigned Opc =
Op.getOpcode();
44300 EVT VT =
Op.getValueType();
44307 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
44310 !DemandedElts[CIdx->getZExtValue()])
44318 unsigned ShAmt =
Op.getConstantOperandVal(1);
44322 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
44330 return Op.getOperand(0);
44337 return Op.getOperand(1);
44369 APInt ShuffleUndef, ShuffleZero;
44373 ShuffleUndef, ShuffleZero, DAG,
Depth,
false)) {
44377 if (ShuffleMask.
size() == (
unsigned)NumElts &&
44379 return VT.getSizeInBits() == V.getValueSizeInBits();
44384 if (DemandedElts.
isSubsetOf(ShuffleUndef | ShuffleZero))
44389 for (
int i = 0; i != NumElts; ++i) {
44390 int M = ShuffleMask[i];
44391 if (!DemandedElts[i] || ShuffleUndef[i])
44393 int OpIdx = M / NumElts;
44394 int EltIdx = M % NumElts;
44395 if (M < 0 || EltIdx != i) {
44400 if (IdentityOp == 0)
44404 "Multiple identity shuffles detected");
44406 if (IdentityOp != 0)
44420 switch (
Op.getOpcode()) {
44434 assert(0 <= M.value() && M.value() < (
int)(Ops.
size() * NumElts) &&
44435 "Shuffle mask index out of range");
44436 DemandedSrcElts[M.value() / NumElts].setBit(M.value() % NumElts);
44439 if (!DemandedSrcElts[
Op.index()].isZero() &&
44456 switch (
Op.getOpcode()) {
44476 switch (
Op->getConstantOperandVal(0)) {
44477 case Intrinsic::x86_sse2_pmadd_wd:
44478 case Intrinsic::x86_avx2_pmadd_wd:
44479 case Intrinsic::x86_avx512_pmaddw_d_512:
44480 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
44481 case Intrinsic::x86_avx2_pmadd_ub_sw:
44482 case Intrinsic::x86_avx512_pmaddubs_w_512:
44491 const APInt &DemandedElts,
44494 unsigned Depth)
const {
44496 unsigned Opc =
Op.getOpcode();
44512 bool AllowTruncate) {
44513 switch (Src.getOpcode()) {
44515 if (!AllowTruncate)
44519 return Src.getOperand(0).getValueSizeInBits() ==
Size;
44529 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
44555 EVT SrcVT = Src.getValueType();
44556 if (SrcVT != MVT::v4i1)
44559 switch (Src.getOpcode()) {
44561 if (Src.getOperand(0).getValueType() == MVT::v4i32 &&
44563 cast<CondCodeSDNode>(Src.getOperand(2))->get() ==
ISD::SETLT) {
44564 SDValue Op0 = Src.getOperand(0);
44589 switch (Src.getOpcode()) {
44599 Src.getOpcode(),
DL, SExtVT,
44605 DL, SExtVT, Src.getOperand(0),
44621 EVT SrcVT = Src.getValueType();
44639 bool PreferMovMsk = Src.getOpcode() ==
ISD::TRUNCATE && Src.hasOneUse() &&
44640 (Src.getOperand(0).getValueType() == MVT::v16i8 ||
44641 Src.getOperand(0).getValueType() == MVT::v32i8 ||
44642 Src.getOperand(0).getValueType() == MVT::v64i8);
44646 if (Src.getOpcode() ==
ISD::SETCC && Src.hasOneUse() &&
44647 cast<CondCodeSDNode>(Src.getOperand(2))->get() ==
ISD::SETLT &&
44649 EVT CmpVT = Src.getOperand(0).getValueType();
44652 (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64))
44653 PreferMovMsk =
true;
44665 SubSrcOps.
size() >= 2) {
44666 SDValue LowerOp = SubSrcOps[0];
44690 bool PropagateSExt =
false;
44695 SExtVT = MVT::v2i64;
44698 SExtVT = MVT::v4i32;
44701 if (Subtarget.
hasAVX() &&
44703 SExtVT = MVT::v4i64;
44704 PropagateSExt =
true;
44708 SExtVT = MVT::v8i16;
44716 SExtVT = MVT::v8i32;
44717 PropagateSExt =
true;
44721 SExtVT = MVT::v16i8;
44728 SExtVT = MVT::v32i8;
44734 if (Subtarget.hasBWI())
44736 SExtVT = MVT::v64i8;
44741 SExtVT = MVT::v64i8;
44750 if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
44753 if (SExtVT == MVT::v8i16) {
44768 EVT SrcVT =
Op.getValueType();
44770 "Expected a vXi1 vector");
44772 "Expected a constant build vector");
44777 if (!In.isUndef() && (In->getAsZExtVal() & 0x1))
44796 EVT DstVT =
N->getValueType(0);
44798 EVT SrcVT =
Op.getValueType();
44800 if (!
Op.hasOneUse())
44820 LHS.getOperand(0).getValueType() == DstVT)
44825 RHS.getOperand(0).getValueType() == DstVT)
44847 auto CreateMMXElement = [&](
SDValue V) {
44850 if (V.getValueType().isFloatingPoint()) {
44851 if (Subtarget.
hasSSE1() && !isa<ConstantFPSDNode>(V)) {
44870 if (
Splat.isUndef())
44885 unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
44894 for (
unsigned i = 0; i != NumElts; ++i)
44899 while (Ops.
size() > 1) {
44900 unsigned NumOps = Ops.
size();
44901 unsigned IntrinOp =
44902 (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq
44903 : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd
44904 : Intrinsic::x86_mmx_punpcklbw));
44907 for (
unsigned i = 0; i != NumOps; i += 2)
44909 Ops[i], Ops[i + 1]);
44923 unsigned Depth = 0) {
44928 unsigned Opc = V.getOpcode();
44932 SDValue Src = V.getOperand(0);
44933 EVT SrcVT = Src.getValueType();
44939 auto *
C = cast<ConstantSDNode>(V);
44942 if (
C->isAllOnes())
44948 SDValue Src = V.getOperand(0);
44953 Subtarget,
Depth + 1))
44961 SDValue Src = V.getOperand(0);
44963 Src.getScalarValueSizeInBits());
44966 Subtarget,
Depth + 1))
44978 Subtarget,
Depth + 1))
44980 Subtarget,
Depth + 1))
44981 return DAG.
getNode(Opc,
DL, VT, N0, N1);
44986 SDValue Src0 = V.getOperand(0);
44987 if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) ||
44988 ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI()))
44991 if (
auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1)))
45013 EVT VT =
N->getValueType(0);
45030 if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.
isScalarInteger() &&
45040 if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.
isScalarInteger() &&
45085 if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.
hasAVX512() &&
45097 auto *BCast = cast<MemIntrinsicSDNode>(N0);
45099 unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
45101 if (MemSize >= 32) {
45109 SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
45112 MemVT, BCast->getMemOperand());
45121 if (VT == MVT::x86mmx) {
45141 (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) &&
45143 bool LowUndef =
true, AllUndefOrZero =
true;
45146 LowUndef &=
Op.isUndef() || (i >= e/2);
45149 if (AllUndefOrZero) {
45162 (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 ||
45163 SrcVT == MVT::v8i8))
45196 if (
auto *
C = dyn_cast<ConstantSDNode>(N0)) {
45197 if (
C->isAllOnes())
45218 SDValue MovmskIn = Src.getOperand(0);
45224 if (MovMskElts <= NumElts &&
45232 if (
EVT(CmpVT) == VT)
45237 unsigned NumConcats = NumElts / MovMskElts;
45266 if (!((Subtarget.
hasSSE1() && VT == MVT::f32) ||
45267 (Subtarget.
hasSSE2() && VT == MVT::f64) ||
45268 (Subtarget.hasFP16() && VT == MVT::f16) ||
45281 !isa<ConstantSDNode>(LogicOp0.
getOperand(0))) {
45290 !isa<ConstantSDNode>(LogicOp1.
getOperand(0))) {
45302 Op0 =
Mul.getOperand(0);
45303 Op1 =
Mul.getOperand(1);
45312 Op.getOperand(0).getScalarValueSizeInBits() <= 8)
45315 auto *BV = dyn_cast<BuildVectorSDNode>(
Op);
45352 unsigned &LogBias,
const SDLoc &
DL,
45368 if (Subtarget.hasVNNI() && !Subtarget.hasVLX())
45434 if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
45444 EVT SrcVT = Src.getValueType();
45446 if (SrcSVT != ExtractVT || (SrcVT.
getSizeInBits() % 128) != 0)
45456 SrcVT =
Lo.getValueType();
45459 assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) ||
45460 (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
45461 "Unexpected value type");
45481 if (ExtractVT == MVT::i8) {
45484 {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
45489 MinPos = DAG.
getBitcast(MVT::v8i16, MinPos);
45509 if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
45510 ExtractVT != MVT::i8 && ExtractVT != MVT::i1)
45516 if (!
Match && ExtractVT == MVT::i1)
45528 EVT MatchVT =
Match.getValueType();
45530 unsigned MaxElts = Subtarget.
hasInt256() ? 32 : 16;
45534 if (ExtractVT == MVT::i1) {
45560 while (NumElts > MaxElts) {
45571 Movmsk = DAG.
getZExtOrTrunc(Movmsk,
DL, NumElts > 32 ? MVT::i64 : MVT::i32);
45574 unsigned MatchSizeInBits =
Match.getValueSizeInBits();
45575 if (!(MatchSizeInBits == 128 ||
45576 (MatchSizeInBits == 256 && Subtarget.
hasAVX())))
45583 if (
Match.getValueType().getVectorNumElements() < 2)
45594 MatchSizeInBits =
Match.getValueSizeInBits();
45609 assert((NumElts <= 32 || NumElts == 64) &&
45610 "Not expecting more than 64 elements");
45612 MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32;
45642 if (!Subtarget.hasVNNI() && !Subtarget.hasAVXVNNI())
45648 if (ExtractVT != MVT::i32)
45680 unsigned StageBias;
45688 if (Stages > StageBias) {
45691 for (
unsigned i = Stages - StageBias; i > 0; --i) {
45693 for (
unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
45694 Mask[j] = MaskEnd + j;
45720 if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64)
45765 for(
unsigned i = Stages - 3; i > 0; --i) {
45767 for(
unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
45768 Mask[j] = MaskEnd + j;
45798 "Only EXTRACT_VECTOR_ELT supported so far");
45801 EVT VT =
N->getValueType(0);
45804 return Use->getOpcode() == ISD::STORE ||
45805 Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
45806 Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
45809 auto *LoadVec = dyn_cast<LoadSDNode>(SrcVec);
45820 DAG.
getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
45821 LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
45841 EVT VT =
N->getValueType(0);
45842 EVT SrcVT = Src.getValueType();
45848 if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(
Idx))
45851 const APInt &IdxC =
N->getConstantOperandAPInt(1);
45852 if (IdxC.
uge(NumSrcElts))
45860 EVT SrcOpVT =
SrcOp.getValueType();
45864 unsigned Offset = IdxC.
urem(Scale) * SrcEltBits;
45877 auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
45879 if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
45880 VT.
getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
45882 MemIntr->getBasePtr(),
45883 MemIntr->getPointerInfo(),
45884 MemIntr->getOriginalAlign(),
45885 MemIntr->getMemOperand()->getFlags());
45899 if (IdxC.
ult(Scale)) {
45927 auto GetLegalExtract = [&Subtarget, &DAG, &dl](
SDValue Vec,
EVT VecVT,
45930 if ((VecVT.is256BitVector() || VecVT.is512BitVector()) &&
45931 (VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 ||
45932 VecSVT == MVT::i64)) {
45934 unsigned NumEltsPerLane = 128 / EltSizeInBits;
45935 unsigned LaneOffset = (
Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits;
45936 unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits();
45939 Idx &= (NumEltsPerLane - 1);
45941 if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) &&
45947 if ((VecVT == MVT::v8i16 && Subtarget.
hasSSE2()) ||
45948 (VecVT == MVT::v16i8 && Subtarget.
hasSSE41())) {
45969 if (Mask.size() != NumSrcElts) {
45970 if ((NumSrcElts % Mask.size()) == 0) {
45972 int Scale = NumSrcElts / Mask.size();
45974 Mask = std::move(ScaledMask);
45975 }
else if ((Mask.size() % NumSrcElts) == 0) {
45978 int Scale = Mask.size() / NumSrcElts;
45979 int Lo = Scale * ExtractIdx;
45980 int Hi = Scale * (ExtractIdx + 1);
45981 for (
int i = 0, e = (
int)Mask.size(); i != e; ++i)
45982 if (i <
Lo ||
Hi <= i)
45986 while (Mask.size() > NumSrcElts &&
45988 Mask = std::move(WidenedMask);
45995 if (Mask.size() == NumSrcElts) {
45999 unsigned Scale = Mask.size() / NumSrcElts;
46005 ExtractIdx = Mask[ScaledIdx];
46009 "Failed to widen vector type");
46021 ExtractIdx = ExtractIdx % Mask.size();
46022 if (
SDValue V = GetLegalExtract(
SrcOp, ExtractVT, ExtractIdx))
46053 if (OpVT != MVT::f32 && OpVT != MVT::f64)
46065 if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&
46078 "Unexpected cond type for combine");
46155 "Reduction doesn't end in an extract from index 0");
46167 auto WidenToV16I8 = [&](
SDValue V,
bool ZeroExtend) {
46168 if (V.getValueType() == MVT::v4i8) {
46169 if (ZeroExtend && Subtarget.
hasSSE41()) {
46186 if (VT != MVT::i8 || NumElts < 4 || !
isPowerOf2_32(NumElts))
46200 Rdx = WidenToV16I8(Rdx,
false);
46205 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46207 {4, 5, 6, 7, -1, -1, -1, -1}));
46208 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46210 {2, 3, -1, -1, -1, -1, -1, -1}));
46211 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46213 {1, -1, -1, -1, -1, -1, -1, -1}));
46219 if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
46220 Rdx = WidenToV16I8(Rdx,
true);
46232 if (VT == MVT::i8) {
46236 VecVT =
Lo.getValueType();
46239 assert(VecVT == MVT::v16i8 &&
"v16i8 reduction expected");
46242 MVT::v16i8,
DL, Rdx, Rdx,
46243 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
46255 if (Opc ==
ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 &&
46266 Rdx = WidenToV16I8(Rdx,
true);
46283 VecVT =
Lo.getValueType();
46308 if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.
hasSSSE3()) ||
46309 ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.
hasSSE3())) {
46316 if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.
hasSSSE3()) &&
46317 !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.
hasSSE3()))
46322 for (
unsigned i = 0; i != ReductionSteps; ++i)
46323 Rdx = DAG.
getNode(HorizOpcode,
DL, VecVT, Rdx, Rdx);
46338 SDValue InputVector =
N->getOperand(0);
46339 SDValue EltIdx =
N->getOperand(1);
46340 auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx);
46343 EVT VT =
N->getValueType(0);
46344 SDLoc dl(InputVector);
46350 if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))
46355 APInt UndefVecElts;
46362 if (UndefVecElts[
Idx])
46371 if (Src.getValueType().getScalarType() == MVT::i1 &&
46392 "Vector type mismatch");
46405 if (VT == MVT::i64 && SrcVT == MVT::v1i64 &&
46412 if (VT == MVT::i32 && SrcVT == MVT::v2i32 &&
46445 N, InputVector.
getValueType(), InputVector, CIdx->getZExtValue(),
46460 bool IsVar = !CIdx;
46462 unsigned ResNo = InputVector.
getResNo();
46463 auto IsBoolExtract = [&BoolExtracts, &ResNo, &IsVar](
SDNode *
Use) {
46465 Use->getOperand(0).getResNo() == ResNo &&
46466 Use->getValueType(0) == MVT::i1) {
46468 IsVar |= !isa<ConstantSDNode>(
Use->getOperand(1));
46474 if (
all_of(InputVector->
users(), IsBoolExtract) &&
46475 (IsVar || BoolExtracts.
size() > 1)) {
46529 if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
46545 if (NumElts > EltSizeInBits) {
46550 assert((NumElts % EltSizeInBits) == 0 &&
"Unexpected integer scale");
46551 unsigned Scale = NumElts / EltSizeInBits;
46553 bool UseBroadcast = Subtarget.
hasInt256() &&
46560 for (
unsigned i = 0; i != Scale; ++i) {
46561 int Offset = UseBroadcast ? (i * EltSizeInBits) : 0;
46565 }
else if (Subtarget.
hasAVX2() && NumElts < EltSizeInBits &&
46566 (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
46571 assert((EltSizeInBits % NumElts) == 0 &&
"Unexpected integer scale");
46573 (NumElts * EltSizeInBits) / NumElts);
46584 for (
unsigned i = 0; i != NumElts; ++i) {
46585 int BitIdx = (i % EltSizeInBits);
46615 EVT VT =
LHS.getValueType();
46616 EVT CondVT =
Cond.getValueType();
46622 assert(CondVT.
isVector() &&
"Vector select expects a vector selector!");
46632 if (TValIsAllZeros && FValIsAllZeros) {
46649 if (!TValIsAllOnes && !FValIsAllZeros &&
Cond.hasOneUse() &&
46657 if (TValIsAllZeros || FValIsAllOnes) {
46660 cast<CondCodeSDNode>(
CC)->
get(),
Cond.getOperand(0).getValueType());
46664 TValIsAllOnes = FValIsAllOnes;
46665 FValIsAllZeros = TValIsAllZeros;
46674 if (TValIsAllOnes && FValIsAllZeros)
46681 if (TValIsAllOnes) {
46688 if (FValIsAllZeros) {
46695 if (TValIsAllZeros) {
46716 unsigned Opcode =
N->getOpcode();
46721 EVT VT =
N->getValueType(0);
46748 auto *TrueC = dyn_cast<ConstantSDNode>(
LHS);
46749 auto *FalseC = dyn_cast<ConstantSDNode>(
RHS);
46750 if (!TrueC || !FalseC)
46754 EVT VT =
N->getValueType(0);
46761 if (
Cond.getValueType() != MVT::i1)
46768 const APInt &TrueVal = TrueC->getAPIntValue();
46769 const APInt &FalseVal = FalseC->getAPIntValue();
46772 if ((TrueVal.isAllOnes() || FalseVal.isAllOnes()) &&
46780 APInt Diff = TrueVal.ssub_ov(FalseVal, OV);
46786 ((VT == MVT::i32 || VT == MVT::i64) &&
46787 (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) {
46792 if (TrueVal.slt(FalseVal)) {
46801 if (!AbsDiff.
isOne())
46805 if (!FalseC->isZero())
46831 EVT VT =
N->getValueType(0);
46854 if (VT == MVT::v32i8 && !Subtarget.
hasAVX2())
46862 if (BitWidth < 8 || BitWidth > 64)
46877 if (OnlyUsedAsSelectCond(
Cond)) {
46894 Cond, U->getOperand(1), U->getOperand(2));
46905 N->getOperand(1),
N->getOperand(2));
46929 EVT MaskVT = Mask.getValueType();
46932 "Mask must be zero/all-bits");
46934 if (
X.getValueType() != MaskVT ||
Y.getValueType() != MaskVT)
46940 return N->getOpcode() ==
ISD::SUB &&
N->getOperand(1) == V &&
46945 if (IsNegV(
Y.getNode(),
X))
46947 else if (IsNegV(
X.getNode(),
Y))
46996 Cond.getOperand(0).getValueType());
46998 Cond.getOperand(1), NewCC);
47023 EVT VT =
LHS.getValueType();
47024 EVT CondVT =
Cond.getValueType();
47033 (!CondConstantVector || CondVT.
getScalarType() == MVT::i8) &&
47036 DL, DAG, Subtarget))
47061 for (
int i = 0; i != NumElts; ++i) {
47064 if (CondMask[i] < NumElts) {
47065 LHSMask[i] =
isUndefOrZero(LHSMask[i]) ? 0x80 : LHSMask[i];
47069 RHSMask[i] =
isUndefOrZero(RHSMask[i]) ? 0x80 : RHSMask[i];
47091 bool IsStrict =
Cond->isStrictFPOpcode();
47093 cast<CondCodeSDNode>(
Cond.getOperand(IsStrict ? 3 : 2))->get();
47097 unsigned Opcode = 0;
47236 DL, {
N->getValueType(0), MVT::Other},
47251 Cond.getOpcode() ==
ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) {
47290 Op.hasOneUse() &&
Op.getOperand(0).hasOneUse() &&
47295 bool SelectableLHS = SelectableOp(
LHS,
RHS);
47296 bool SelectableRHS = SelectableOp(
RHS,
LHS);
47297 if (SelectableLHS || SelectableRHS) {
47298 EVT SrcVT = SelectableLHS ?
LHS.getOperand(0).getValueType()
47299 :
RHS.getOperand(0).getValueType();
47317 Cond.hasOneUse()) {
47318 EVT CondVT =
Cond.getValueType();
47342 if (
LHS == Cond0 &&
RHS == Cond1) {
47367 cast<CondCodeSDNode>(InnerSetCC.
getOperand(2))->get();
47435 DL, VT,
LHS.getOperand(0),
LHS.getOperand(1));
47447 DL, VT,
RHS.getOperand(0),
RHS.getOperand(1));
47475 Cond.getScalarValueSizeInBits(),
47477 Cond.hasOneUse()) {
47479 Cond.getOperand(0).getOperand(1));
47487 Cond.hasOneUse()) {
47508 LHS.getOperand(0).getValueType() == IntVT)) &&
47510 RHS.getOperand(0).getValueType() == IntVT))) {
47514 LHS =
LHS.getOperand(0);
47519 RHS =
RHS.getOperand(0);
47535 cast<CondCodeSDNode>(
Cond.getOperand(2))->get() ==
ISD::SETEQ &&
47536 Cond.getOperand(0).getValueType() == VT) {
47540 if (
C &&
C->getAPIntValue().isPowerOf2()) {
47551 bool CanShiftBlend =
47553 (Subtarget.
hasAVX2() && EltBitWidth == 64) ||
47554 (Subtarget.hasXOP()));
47555 if (CanShiftBlend &&
47557 return C->getAPIntValue().isPowerOf2();
47563 auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i));
47565 MaskVal->getAPIntValue().exactLogBase2());
47590 (Cmp.getOpcode() ==
X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
47596 if (!Cmp.hasOneUse())
47611 SDValue CmpLHS = Cmp.getOperand(0);
47612 SDValue CmpRHS = Cmp.getOperand(1);
47623 auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
47627 APInt Addend = OpRHSC->getAPIntValue();
47631 auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
47635 APInt Comparison = CmpRHSC->getAPIntValue();
47636 APInt NegAddend = -Addend;
47640 if (Comparison != NegAddend) {
47641 APInt IncComparison = Comparison + 1;
47642 if (IncComparison == NegAddend) {
47644 Comparison = IncComparison;
47646 }
else if (
CC ==
X86::COND_LE && !Comparison.isMaxSignedValue()) {
47647 Comparison = IncComparison;
47651 APInt DecComparison = Comparison - 1;
47652 if (DecComparison == NegAddend) {
47654 Comparison = DecComparison;
47656 }
else if (
CC ==
X86::COND_L && !Comparison.isMinSignedValue()) {
47657 Comparison = DecComparison;
47665 if (Comparison == NegAddend) {
47668 auto *AN = cast<AtomicSDNode>(CmpLHS.
getNode());
47673 AN->getMemOperand());
47682 if (!Comparison.isZero())
47709 if (!Cmp.hasOneUse())
47717 Src = Cmp.getOperand(0);
47721 if (Src.getOpcode() !=
ISD::SRA || !Src.hasOneUse())
47723 Src = Src.getOperand(0);
47728 Src = Cmp.getOperand(1);
47730 Src = Cmp.getOperand(0);
47739 MVT SrcVT = Src.getSimpleValueType();
47744 if (Src.getOpcode() ==
ISD::SHL) {
47746 Src = Src.getOperand(0);
47776 (Cmp.getOpcode() ==
X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
47785 SDValue Op1 = Cmp.getOperand(0);
47786 SDValue Op2 = Cmp.getOperand(1);
47791 bool checkAgainstTrue =
false;
47793 if ((
C = dyn_cast<ConstantSDNode>(Op1)))
47795 else if ((
C = dyn_cast<ConstantSDNode>(Op2)))
47800 if (
C->getZExtValue() == 1) {
47801 needOppositeCond = !needOppositeCond;
47802 checkAgainstTrue =
true;
47803 }
else if (
C->getZExtValue() != 0)
47807 bool truncatedToBoolWithAnd =
false;
47821 truncatedToBoolWithAnd =
true;
47832 if (checkAgainstTrue && !truncatedToBoolWithAnd)
47835 "Invalid use of SETCC_CARRY!");
47840 if (needOppositeCond)
47856 Op =
Op.getOperand(0);
47864 bool FValIsFalse =
true;
47869 needOppositeCond = !needOppositeCond;
47870 FValIsFalse =
false;
47878 if (needOppositeCond)
47904 switch (
Cond->getOpcode()) {
47905 default:
return false;
47912 SetCC0 =
Cond->getOperand(0);
47913 SetCC1 =
Cond->getOperand(1);
47935 bool FoundAndLSB =
false;
47961 !isa<ConstantSDNode>(CarryOp1.
getOperand(1))) {
47974 }
else if (FoundAndLSB) {
47981 return getBT(Carry, BitNo,
DL, DAG);
48106 assert(VT == MVT::i32 &&
"Expected i32 EFLAGS comparison result");
48113 if ((EltBits == 32 || EltBits == 64) && Subtarget.
hasAVX()) {
48119 }
else if (EltBits == 16) {
48153 if (Src0 && Src1) {
48176 unsigned CmpOpcode = EFLAGS.
getOpcode();
48179 auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.
getOperand(1));
48182 const APInt &CmpVal = CmpConstant->getAPIntValue();
48199 "Unexpected MOVMSK operand");
48205 NumElts <= CmpBits && CmpVal.
isMask(NumElts);
48206 if (!IsAnyOf && !IsAllOf)
48228 if ((BCNumEltBits == 32 || BCNumEltBits == 64) &&
48229 BCNumEltBits > NumEltBits &&
48248 EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
48264 if (IsAllOf && Subtarget.
hasSSE41() && IsOneUse) {
48282 LHS.getOperand(0),
LHS.getOperand(1));
48284 RHS.getOperand(0),
RHS.getOperand(1));
48304 if (IsAnyOf && CmpBits == 8 && VecOp1.
isUndef()) {
48318 if (CmpBits >= 16 && Subtarget.
hasInt256() &&
48319 (IsAnyOf || (SignExt0 && SignExt1))) {
48324 Result.getValueType().getVectorNumElements() <= NumElts) {
48326 Result.getOperand(0), Result.getOperand(1));
48330 Result = DAG.
getBitcast(MVT::v32i8, Result);
48332 unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
48333 if (!SignExt0 || !SignExt1) {
48335 "Only perform v16i16 signmasks for any_of patterns");
48362 if (NumElts <= CmpBits &&
48364 ShuffleMask, DAG) &&
48366 ShuffleInputs[0].getValueSizeInBits() == VecVT.
getSizeInBits() &&
48381 if (NumElts <= CmpBits && Subtarget.
hasAVX() &&
48382 !Subtarget.preferMovmskOverVTest() && IsOneUse &&
48383 (NumEltBits == 32 || NumEltBits == 64)) {
48430 SDValue FalseOp =
N->getOperand(0);
48431 SDValue TrueOp =
N->getOperand(1);
48436 if (TrueOp == FalseOp)
48456 if (
ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
48459 if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
48468 if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
48474 unsigned ShAmt = TrueC->getAPIntValue().logBase2();
48482 if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
48487 FalseC->getValueType(0),
Cond);
48495 if (
N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64) {
48496 APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
48498 "Implicit constant truncation");
48500 bool isFastMultiplier =
false;
48501 if (Diff.
ult(10)) {
48511 isFastMultiplier =
true;
48516 if (isFastMultiplier) {
48527 if (FalseC->getAPIntValue() != 0)
48556 (CmpAgainst = dyn_cast<ConstantSDNode>(
Cond.getOperand(1))) &&
48557 !isa<ConstantSDNode>(
Cond.getOperand(0))) {
48560 CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
48566 CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
48586 auto *Sub1C = dyn_cast<ConstantSDNode>(
Cond.getOperand(1));
48587 if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {
48588 EVT CondVT =
Cond->getValueType(0);
48589 EVT OuterVT =
N->getValueType(0);
48628 SDValue LOps[] = {FalseOp, TrueOp,
48652 if (Const ==
Cond.getOperand(0))
48653 Const =
Cond.getOperand(1);
48656 if (isa<ConstantSDNode>(Const) &&
Add.getOpcode() ==
ISD::ADD &&
48657 Add.hasOneUse() && isa<ConstantSDNode>(
Add.getOperand(1)) &&
48660 Add.getOperand(0).getOperand(0) ==
Cond.getOperand(0)) {
48661 EVT VT =
N->getValueType(0);
48678 EVT VT =
N->getOperand(0).getValueType();
48682 assert(
N->getNumOperands() == 2 &&
"NumOperands of Mul are 2");
48683 unsigned SignBits[2] = {1, 1};
48684 bool IsPositive[2] = {
false,
false};
48685 for (
unsigned i = 0; i < 2; i++) {
48692 bool AllPositive = IsPositive[0] && IsPositive[1];
48693 unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
48695 if (MinSignBits >= 25)
48696 Mode = ShrinkMode::MULS8;
48698 else if (AllPositive && MinSignBits >= 24)
48699 Mode = ShrinkMode::MULU8;
48701 else if (MinSignBits >= 17)
48702 Mode = ShrinkMode::MULS16;
48704 else if (AllPositive && MinSignBits >= 16)
48705 Mode = ShrinkMode::MULU16;
48751 if (Subtarget.
hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
48760 EVT VT =
N->getOperand(0).getValueType();
48762 if ((NumElts % 2) != 0)
48774 if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8)
48784 ReducedVT, NewN0, NewN1);
48790 for (
unsigned i = 0, e = NumElts / 2; i < e; i++) {
48791 ShuffleMask[2 * i] = i;
48792 ShuffleMask[2 * i + 1] = i + NumElts;
48798 for (
unsigned i = 0, e = NumElts / 2; i < e; i++) {
48799 ShuffleMask[2 * i] = i + NumElts / 2;
48800 ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
48811 auto combineMulShlAddOrSub = [&](
int Mult,
int Shift,
bool isAdd) {
48821 auto combineMulMulAddOrSub = [&](
int Mul1,
int Mul2,
bool isAdd) {
48836 return combineMulShlAddOrSub(5, 1,
true);
48839 return combineMulShlAddOrSub(5, 2,
true);
48842 return combineMulShlAddOrSub(5, 3,
true);
48846 combineMulShlAddOrSub(5, 2,
true));
48849 return combineMulShlAddOrSub(9, 1,
true);
48852 return combineMulShlAddOrSub(9, 2,
true);
48855 return combineMulShlAddOrSub(9, 3,
true);
48858 return combineMulShlAddOrSub(3, 2,
true);
48861 return combineMulShlAddOrSub(3, 3,
false);
48864 return combineMulMulAddOrSub(5, 5,
true);
48867 return combineMulMulAddOrSub(9, 3,
true);
48871 combineMulMulAddOrSub(9, 3,
true));
48881 if (ScaleShift >= 1 && ScaleShift < 4) {
48882 unsigned ShiftAmt =
Log2_64((MulAmt & (MulAmt - 1)));
48903 if (Subtarget.isPMADDWDSlow())
48906 EVT VT =
N->getValueType(0);
48919 if (32 <= (2 * NumElts) && Subtarget.
hasAVX512() && !Subtarget.hasBWI())
48964 if (Src.getScalarValueSizeInBits() == 16 && VT.
getSizeInBits() <= 128)
48968 if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.
hasSSE41()) {
48976 N->isOnlyUserOf(
Op.getNode())) {
48978 if (Src.getScalarValueSizeInBits() == 16)
48983 N->isOnlyUserOf(
Op.getNode())) {
48989 SDValue ZeroN0 = GetZeroableOp(N0);
48990 SDValue ZeroN1 = GetZeroableOp(N1);
48991 if (!ZeroN0 && !ZeroN1)
48993 N0 = ZeroN0 ? ZeroN0 : N0;
48994 N1 = ZeroN1 ? ZeroN1 : N1;
49013 EVT VT =
N->getValueType(0);
49053 EVT VT =
N->getValueType(0);
49065 if (VT != MVT::i64 && VT != MVT::i32 &&
49095 int64_t SignMulAmt =
C.getSExtValue();
49096 assert(SignMulAmt !=
INT64_MIN &&
"Int min should have been handled!");
49097 uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
49100 if (VT == MVT::i64 || VT == MVT::i32) {
49101 if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
49104 if (SignMulAmt < 0)
49112 if ((AbsMulAmt % 9) == 0) {
49114 MulAmt2 = AbsMulAmt / 9;
49115 }
else if ((AbsMulAmt % 5) == 0) {
49117 MulAmt2 = AbsMulAmt / 5;
49118 }
else if ((AbsMulAmt % 3) == 0) {
49120 MulAmt2 = AbsMulAmt / 3;
49126 (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
49128 if (
isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 &&
N->hasOneUse() &&
49129 N->user_begin()->getOpcode() ==
ISD::ADD))
49151 if (SignMulAmt < 0)
49153 }
else if (!Subtarget.slowLEA())
49164 if (SignMulAmt < 0)
49172 if (SignMulAmt < 0)
49176 }
else if (SignMulAmt >= 0 &&
isPowerOf2_64(AbsMulAmt - 2) &&
49177 (!VT.
isVector() || Subtarget.fastImmVectorShift())) {
49185 }
else if (SignMulAmt >= 0 &&
isPowerOf2_64(AbsMulAmt + 2) &&
49186 (!VT.
isVector() || Subtarget.fastImmVectorShift())) {
49194 }
else if (SignMulAmt >= 0 && VT.
isVector() &&
49195 Subtarget.fastImmVectorShift()) {
49196 uint64_t AbsMulAmtLowBit = AbsMulAmt & (-AbsMulAmt);
49198 std::optional<unsigned> Opc;
49200 ShiftAmt1 = AbsMulAmt - AbsMulAmtLowBit;
49203 ShiftAmt1 = AbsMulAmt + AbsMulAmtLowBit;
49214 NewMul = DAG.
getNode(*Opc,
DL, VT, Shift1, Shift2);
49233 "SRL or SRA node is required here!");
49239 SDValue ShiftOperand =
N->getOperand(0);
49244 EVT VT =
N->getValueType(0);
49257 unsigned ExtOpc =
LHS.getOpcode();
49259 RHS.getOpcode() != ExtOpc)
49263 LHS =
LHS.getOperand(0);
49264 RHS =
RHS.getOperand(0);
49267 EVT MulVT =
LHS.getValueType();
49275 return DAG.
getNode(ExtOpc,
DL, VT, Mulh);
49317 bool MaskOK =
false;
49339 if (MaskOK && Mask != 0)
49396 for (
MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
49399 if (ShiftSize >=
Size || ShlConst !=
Size - ShiftSize)
49403 if (SraConst.
eq(ShlConst))
49405 if (SraConst.
ult(ShlConst))
49460 auto *ShiftC = dyn_cast<ConstantSDNode>(N1);
49461 auto *AndC = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
49462 if (!ShiftC || !AndC)
49468 APInt MaskVal = AndC->getAPIntValue();
49477 APInt NewMaskVal = MaskVal.
lshr(ShiftC->getAPIntValue());
49480 if ((OldMaskSize > 8 && NewMaskSize <= 8) ||
49481 (OldMaskSize > 32 && NewMaskSize <= 32)) {
49492 unsigned Opcode =
N->getOpcode();
49496 EVT VT =
N->getValueType(0);
49521 ShuffleOps[0].getValueType().is256BitVector() &&
49551 if (IsShuf0 || IsShuf1) {
49554 ScaledMask0.
assign({0, 1});
49558 ScaledMask1.
assign({0, 1});
49562 int PostShuffle[4] = {-1, -1, -1, -1};
49568 if (!
LHS ||
LHS == Src) {
49572 if (!
RHS ||
RHS == Src) {
49579 if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) &&
49580 FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) &&
49581 FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) &&
49582 FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) {
49603 [](
SDValue Op) {
return Op.getValueType().is256BitVector(); }) &&
49605 [](
SDValue Op) {
return Op.getValueType().is256BitVector(); }) &&
49612 if ((Op00 == Op11) && (Op01 == Op10)) {
49616 if ((Op00 == Op10) && (Op01 == Op11)) {
49617 const int Map[4] = {0, 2, 1, 3};
49619 {Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]],
49620 Map[ScaledMask1[1]]});
49637 unsigned Opcode =
N->getOpcode();
49639 "Unexpected pack opcode");
49641 EVT VT =
N->getValueType(0);
49646 unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
49649 "Unexpected PACKSS/PACKUS input type");
49654 APInt UndefElts0, UndefElts1;
49665 unsigned NumSrcElts = NumDstElts / 2;
49666 unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
49667 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
49669 APInt Undefs(NumDstElts, 0);
49671 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
49672 for (
unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
49673 unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
49674 auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0);
49675 auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0);
49677 if (UndefElts[SrcIdx]) {
49678 Undefs.
setBit(Lane * NumDstEltsPerLane + Elt);
49682 APInt &Val = EltBits[SrcIdx];
49693 if (Val.
isIntN(DstBitsPerElt))
49694 Val = Val.
trunc(DstBitsPerElt);
49700 Bits[Lane * NumDstEltsPerLane + Elt] = Val;
49718 if (Not0 && Not1) {
49736 if (Subtarget.hasVLX())
49762 assert((Src0 || Src1) &&
"Found PACK(UNDEF,UNDEF)");
49790 "Unexpected horizontal add/sub opcode");
49793 MVT VT =
N->getSimpleValueType(0);
49798 if (
LHS !=
RHS &&
LHS.getOpcode() ==
N->getOpcode() &&
49799 LHS.getOpcode() ==
RHS.getOpcode() &&
49800 LHS.getValueType() ==
RHS.getValueType() &&
49801 N->isOnlyUserOf(
LHS.getNode()) &&
N->isOnlyUserOf(
RHS.getNode())) {
49810 LHS0.
isUndef() ? LHS1 : LHS0,
49811 RHS0.
isUndef() ? RHS1 : RHS0);
49838 "Unexpected shift opcode");
49839 EVT VT =
N->getValueType(0);
49855 EltBits[0].getZExtValue(), DAG);
49869 unsigned Opcode =
N->getOpcode();
49872 "Unexpected shift opcode");
49874 EVT VT =
N->getValueType(0);
49879 "Unexpected value type");
49888 unsigned ShiftVal =
N->getConstantOperandVal(1);
49889 if (ShiftVal >= NumBitsPerElt) {
49892 ShiftVal = NumBitsPerElt - 1;
49912 unsigned NewShiftVal = Amt0 + Amt1;
49913 if (NewShiftVal >= NumBitsPerElt) {
49918 NewShiftVal = NumBitsPerElt - 1;
49931 return MergeShifts(N0.
getOperand(0), ShiftVal, 1);
49934 if (LogicalShift && (ShiftVal % 8) == 0) {
49944 if (Opcode ==
X86ISD::VSRAI && NumBitsPerElt == 32 && ShiftVal == 31 &&
49963 auto TryConstantFold = [&](
SDValue V) {
49971 "Unexpected shift value type");
49975 for (
unsigned i = 0, e = EltBits.
size(); i != e; ++i) {
49976 APInt &Elt = EltBits[i];
49992 if (
N->isOnlyUserOf(N0.
getNode())) {
49993 if (
SDValue C = TryConstantFold(N0))
50022 EVT VT =
N->getValueType(0);
50023 unsigned Opcode =
N->getOpcode();
50027 "Unexpected vector insertion");
50080 if (VT == MVT::f32 || VT == MVT::f64 ||
50081 (VT == MVT::f16 && Subtarget.hasFP16())) {
50082 bool ExpectingFlags =
false;
50084 for (
const SDNode *U :
N->users()) {
50085 if (ExpectingFlags)
50088 switch (U->getOpcode()) {
50093 ExpectingFlags =
true;
50103 if (!ExpectingFlags) {
50128 N->getSimpleValueType(0));
50135 MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
50137 if (is64BitFP && !Subtarget.is64Bit()) {
50144 MVT::v2f64, OnesOrZeroesF);
50157 return OneBitOfTruth;
50167 assert(
N->getOpcode() ==
ISD::AND &&
"Unexpected opcode combine into ANDNP");
50169 MVT VT =
N->getSimpleValueType(0);
50199 assert(
N->getOpcode() ==
ISD::AND &&
"Unexpected opcode combine into ANDNP");
50201 EVT VT =
N->getValueType(0);
50208 auto GetNot = [&DAG](
SDValue V) {
50213 if (!SVN || !SVN->hasOneUse() || !SVN->isSplat() ||
50214 !SVN->getOperand(1).isUndef()) {
50221 if (!isa<ConstantSDNode>(IVEN.
getOperand(2)) ||
50328 return DAG.
getNode(
N.getOpcode(),
DL, VT, N0, N1);
50340 EVT VT =
N.getValueType();
50353 switch (
N.getOpcode()) {
50369 default:
llvm_unreachable(
"Unexpected input node for FP logic conversion");
50387 "Unexpected bit opcode");
50399 if (N00Type != N10Type || !((Subtarget.
hasSSE1() && N00Type == MVT::f32) ||
50400 (Subtarget.
hasSSE2() && N00Type == MVT::f64) ||
50401 (Subtarget.hasFP16() && N00Type == MVT::f16)))
50419 if (!Subtarget.
hasAVX() &&
50448 "Unexpected bit opcode");
50480 "Unexpected bit opcode");
50517 "Unexpected bit opcode");
50569 if (
N->getValueType(0) == VT &&
50609 return DAG.
getBitcast(
N->getValueType(0), Shift);
50630 return Subtarget.hasBMI2() &&
50631 (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
50639 MVT VT =
N->getSimpleValueType(0);
50672 MVT VT =
Node->getSimpleValueType(0);
50680 for (
unsigned i = 0; i < 2; i++) {
50682 auto *Ld = dyn_cast<LoadSDNode>(
Node->getOperand(i));
50685 const Value *
MemOp = Ld->getMemOperand()->getValue();
50693 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
MemOp)) {
50694 if (
auto *GV = dyn_cast<GlobalVariable>(
GEP->getOperand(0))) {
50695 if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
50698 if (!isa<ConstantDataArray>(
Init) ||
50707 uint64_t ArrayElementCount =
Init->getType()->getArrayNumElements();
50708 bool ConstantsMatch =
true;
50709 for (
uint64_t j = 0; j < ArrayElementCount; j++) {
50710 auto *Elem = cast<ConstantInt>(
Init->getAggregateElement(j));
50711 if (Elem->getZExtValue() != (((
uint64_t)1 << j) - 1)) {
50712 ConstantsMatch =
false;
50716 if (!ConstantsMatch)
50749 EVT VT =
N->getValueType(0);
50753 auto *C1 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
50761 if (!Src.hasOneUse())
50766 if (!Src.getOperand(0).hasOneUse())
50768 Src = Src.getOperand(0);
50771 if (Src.getOpcode() !=
ISD::BITCAST || !Src.getOperand(0).hasOneUse())
50774 Src = Src.getOperand(0);
50775 EVT SrcVT = Src.getValueType();
50787 SDValue SubVec = Src.getOperand(0);
50797 auto IsLegalSetCC = [&](
SDValue V) {
50800 EVT SetccVT = V.getOperand(0).getValueType();
50829 static constexpr unsigned kMaxDepth = 2;
50832 if (!
Op.hasOneUse())
50837 if (
Op.getOpcode() == Opc) {
50839 if (
Depth++ >= kMaxDepth)
50842 for (
unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
50845 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), R,
50846 Op.getOperand(1 - OpIdx));
50852 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50858 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50865 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50872 EVT VT =
N->getValueType(0);
50875 (VT != MVT::i32 && VT != MVT::i64))
50881 for (
unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
50884 N->getOperand(1 - OpIdx), 0))
50913 SDNode *BrCond = *Flag->user_begin();
50916 unsigned CondNo = 2;
50946 if (BrCond != NewBrCond.
getNode())
50967 SDValue SetCC0 =
N->getOperand(0);
50968 SDValue SetCC1 =
N->getOperand(1);
50973 auto GetCombineToOpc = [&](
SDValue V) ->
unsigned {
50975 unsigned Opc =
Op.getOpcode();
50983 unsigned NewOpc = 0;
50987 if (!(NewOpc = GetCombineToOpc(SetCC1))) {
50989 if (!(NewOpc = GetCombineToOpc(SetCC1)))
50999 bool IsOR =
N->getOpcode() ==
ISD::OR;
51025 {Sub.getOperand(0), Sub.getOperand(0),
51026 CFlags, SrcCC, SetCC0.getOperand(1)});
51036 EVT VT =
N->getValueType(0);
51041 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
51049 if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) {
51062 if (VT == MVT::i1) {
51066 SrcOps.
size() == 1) {
51067 unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
51070 if (!Mask && TLI.
isTypeLegal(SrcOps[0].getValueType()))
51074 "Unexpected partial reduction mask");
51100 APInt MulCLowBit = MulC & (-MulC);
51102 (MulCLowBit + MulC).isPowerOf2()) {
51105 assert(MulCLowBitLog != -1 &&
51106 "Isolated lowbit is somehow not a power of 2!");
51130 DAG, DCI, Subtarget))
51174 Src.getOperand(0)->hasOneUse())
51175 Src = Src.getOperand(0);
51176 bool ContainsNOT =
false;
51180 Src = Src.getOperand(0);
51182 ContainsNOT =
true;
51184 if (Src.getOpcode() ==
ISD::SRL &&
51185 !isa<ConstantSDNode>(Src.getOperand(1))) {
51186 SDValue BitNo = Src.getOperand(1);
51187 Src = Src.getOperand(0);
51190 Src = Src.getOperand(0);
51192 ContainsNOT =
true;
51195 if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.
getSizeInBits() >= 32))
51209 auto GetDemandedMasks = [&](
SDValue Op) {
51220 for (
int I = 0;
I != NumElts; ++
I) {
51221 if (UndefElts[
I]) {
51226 }
else if (!EltBits[
I].
isZero()) {
51234 APInt Bits0, Elts0;
51235 APInt Bits1, Elts1;
51236 std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
51237 std::tie(Bits1, Elts1) = GetDemandedMasks(N0);
51250 if (NewN0 || NewN1)
51252 NewN1 ? NewN1 : N1);
51269 return M.isZero() || M.isAllOnes();
51277 for (
unsigned i = 0; i != Scale; ++i) {
51280 int VecIdx = Scale *
Idx + i;
51281 ShuffleMask[VecIdx] = EltBits[i].isZero() ?
SM_SentinelZero : VecIdx;
51285 {SrcVec}, 0, SrcVec, ShuffleMask, {}, 1,
51288 true, DAG, Subtarget))
51306 MVT VT =
N->getSimpleValueType(0);
51308 if (!VT.
isVector() || (EltSizeInBits % 8) != 0)
51318 if (!(Subtarget.hasXOP() ||
useVPTERNLOG(Subtarget, VT) ||
51323 APInt UndefElts0, UndefElts1;
51334 for (
unsigned i = 0, e = EltBits0.
size(); i != e; ++i) {
51336 if (UndefElts0[i] || UndefElts1[i])
51338 if (EltBits0[i] != ~EltBits1[i])
51345 MVT OpSVT = EltSizeInBits <= 32 ? MVT::i32 : MVT::i64;
51409 EVT VT =
N->getValueType(0);
51423 EVT MaskVT = Mask.getValueType();
51440 if (Subtarget.hasVLX())
51460 EVT VT = Cmp.getOperand(0).getValueType();
51485 auto isORCandidate = [](
SDValue N) {
51486 return (
N->getOpcode() ==
ISD::OR &&
N->hasOneUse());
51492 if (!
N->hasOneUse() || !
N->getSimpleValueType(0).bitsGE(MVT::i32) ||
51493 !isORCandidate(
N->getOperand(0)))
51497 auto isSetCCCandidate = [](
SDValue N) {
51502 N->getOperand(1).getValueType().bitsGE(MVT::i32);
51505 SDNode *OR =
N->getOperand(0).getNode();
51511 while (((isORCandidate(
LHS) && isSetCCCandidate(
RHS)) ||
51512 (isORCandidate(
RHS) && isSetCCCandidate(
LHS)))) {
51515 LHS = OR->getOperand(0);
51516 RHS = OR->getOperand(1);
51520 if (!(isSetCCCandidate(
LHS) && isSetCCCandidate(
RHS)) ||
51521 !isORCandidate(
SDValue(OR, 0)))
51538 while (!ORNodes.
empty()) {
51540 LHS = OR->getOperand(0);
51541 RHS = OR->getOperand(1);
51560 if (NotOp == And1_R)
51562 if (NotOp != And1_L)
51614 bool ZeroSecondOpOnly =
false) {
51620 Y =
Y.getOperand(0);
51626 EFLAGS =
Y.getOperand(1);
51637 auto *ConstantX = dyn_cast<ConstantSDNode>(
X);
51638 if (ConstantX && !ZeroSecondOpOnly) {
51653 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51676 if (ZeroSecondOpOnly)
51688 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51718 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51738 EVT ZVT = Z.getValueType();
51797 bool IsSub =
N->getOpcode() ==
ISD::SUB;
51800 EVT VT =
N->getValueType(0);
51828 if (
auto *N1C = dyn_cast<ConstantSDNode>(N1)) {
51830 bool N1COdd = N1C->getZExtValue() & 1;
51831 if (IsSub ? N1COdd : !N1COdd)
51856 EVT VT =
N->getValueType(0);
51861 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
51870 if (VT == MVT::i1) {
51874 SrcOps.
size() == 1) {
51875 unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
51878 if (!Mask && TLI.
isTypeLegal(SrcOps[0].getValueType()))
51882 "Unexpected partial reduction mask");
51904 DAG, DCI, Subtarget))
51920 if ((VT == MVT::i32 || VT == MVT::i64) &&
51928 if (
auto *CN = dyn_cast<ConstantSDNode>(N1)) {
51929 uint64_t Val = CN->getZExtValue();
51930 if (Val == 1 || Val == 2 || Val == 3 || Val == 4 || Val == 7 || Val == 8) {
51950 unsigned HalfElts = NumElts / 2;
51987 for (
int I = 0;
I != NumElts; ++
I)
51988 if (!EltBits[
I].isAllOnes())
51993 if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
52017 EVT ResultType =
N->getValueType(0);
52018 if (ResultType != MVT::i8 && ResultType != MVT::i1)
52039 if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
52043 if (!isa<ConstantSDNode>(Shift.
getOperand(1)) ||
52059 if (SetCCResultType != ResultType)
52073 EVT VT =
N->getValueType(0);
52083 case MVT::v2i64:
if (!Subtarget.
hasSSE2())
return SDValue();
break;
52087 case MVT::v4i64:
if (!Subtarget.
hasAVX2())
return SDValue();
break;
52130 EVT InVT = In.getValueType();
52134 "Unexpected types for truncate operation");
52170 unsigned NumSrcBits = In.getScalarValueSizeInBits();
52171 assert(NumSrcBits > NumDstBits &&
"Unexpected types for truncate operation");
52173 APInt SignedMax, SignedMin;
52201 EVT InVT = In.getValueType();
52209 InVT == MVT::v16i32 && VT == MVT::v16i8) {
52213 DL, DAG, Subtarget);
52214 assert(Mid &&
"Failed to pack!");
52225 bool PreferAVX512 = ((Subtarget.
hasAVX512() && InSVT == MVT::i32) ||
52226 (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
52233 (SVT == MVT::i8 || SVT == MVT::i16) &&
52234 (InSVT == MVT::i16 || InSVT == MVT::i32)) {
52237 if (SVT == MVT::i8 && InSVT == MVT::i32) {
52241 assert(Mid &&
"Failed to pack!");
52244 assert(V &&
"Failed to pack!");
52246 }
else if (SVT == MVT::i8 || Subtarget.
hasSSE41())
52257 Subtarget.
hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI()) &&
52258 (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) {
52259 unsigned TruncOpc = 0;
52274 ResElts *= NumConcats;
52276 ConcatOps[0] = SatVal;
52298 auto *Ld = cast<LoadSDNode>(
N);
52301 SDValue Chain = Ld->getChain();
52314 auto MatchingBits = [](
const APInt &Undefs,
const APInt &UserUndefs,
52319 if (UserUndefs[
I] || Bits[
I] != UserBits[
I])
52328 auto *UserLd = dyn_cast<MemSDNode>(
User);
52329 if (
User !=
N && UserLd &&
52333 UserLd->getChain() == Chain && !
User->hasAnyUseOfValue(1) &&
52334 User->getValueSizeInBits(0).getFixedValue() >
52336 EVT UserVT =
User->getValueType(0);
52337 SDValue UserPtr = UserLd->getBasePtr();
52342 if (UserC && UserPtr !=
Ptr) {
52346 APInt Undefs, UserUndefs;
52353 UserUndefs, UserBits)) {
52354 if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
52372 auto *Ld = cast<LoadSDNode>(
N);
52374 EVT MemVT = Ld->getMemoryVT();
52385 ((Ld->isNonTemporal() && !Subtarget.
hasInt256() &&
52386 Ld->getAlign() >=
Align(16)) ||
52388 *Ld->getMemOperand(), &
Fast) &&
52394 unsigned HalfOffset = 16;
52395 SDValue Ptr1 = Ld->getBasePtr();
52401 DAG.
getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
52402 Ld->getOriginalAlign(),
52405 Ld->getPointerInfo().getWithOffset(HalfOffset),
52406 Ld->getOriginalAlign(),
52422 SDValue IntLoad = DAG.
getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
52423 Ld->getPointerInfo(),
52424 Ld->getOriginalAlign(),
52436 SDValue Chain = Ld->getChain();
52438 auto *UserLd = dyn_cast<MemSDNode>(
User);
52439 if (
User !=
N && UserLd &&
52441 UserLd->getChain() == Chain && UserLd->getBasePtr() ==
Ptr &&
52442 UserLd->getMemoryVT().getSizeInBits() == MemVT.
getSizeInBits() &&
52443 !
User->hasAnyUseOfValue(1) &&
52444 User->getValueSizeInBits(0).getFixedValue() >
52458 unsigned AddrSpace = Ld->getAddressSpace();
52465 return DAG.
getExtLoad(Ext, dl, RegVT, Ld->getChain(), Cast,
52466 Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
52485 auto *BV = dyn_cast<BuildVectorSDNode>(V);
52489 int TrueIndex = -1;
52491 for (
unsigned i = 0; i < NumElts; ++i) {
52495 auto *ConstNode = dyn_cast<ConstantSDNode>(
Op);
52498 if (ConstNode->getAPIntValue().countr_one() >= 1) {
52500 if (TrueIndex >= 0)
52517 if (TrueMaskElt < 0)
52525 if (TrueMaskElt != 0) {
52545 assert(
ML->isUnindexed() &&
"Unexpected indexed masked load!");
52559 EVT VT =
ML->getValueType(0);
52563 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
52570 ML->getPointerInfo().getWithOffset(
Offset),
52571 Alignment,
ML->getMemOperand()->getFlags());
52579 return DCI.
CombineTo(
ML, Insert, Load.getValue(1),
true);
52585 assert(
ML->isUnindexed() &&
"Unexpected indexed masked load!");
52590 EVT VT =
ML->getValueType(0);
52599 if (LoadFirstElt && LoadLastElt) {
52601 ML->getMemOperand());
52603 ML->getPassThru());
52613 if (
ML->getPassThru().isUndef())
52622 VT,
DL,
ML->getChain(),
ML->getBasePtr(),
ML->getOffset(),
ML->getMask(),
52623 DAG.
getUNDEF(VT),
ML->getMemoryVT(),
ML->getMemOperand(),
52624 ML->getAddressingMode(),
ML->getExtensionType());
52626 ML->getPassThru());
52634 auto *Mld = cast<MaskedLoadSDNode>(
N);
52637 if (Mld->isExpandingLoad())
52653 SDValue Mask = Mld->getMask();
52654 if (Mask.getScalarValueSizeInBits() != 1) {
52655 EVT VT = Mld->getValueType(0);
52666 VT,
SDLoc(
N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(),
52667 NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(),
52668 Mld->getAddressingMode(), Mld->getExtensionType());
52696 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
52725 return ScalarStore;
52730 if (Mask.getScalarValueSizeInBits() != 1) {
52773 StoredVal = DAG.
getBitcast(NewVT, StoredVal);
52782 if (VT == MVT::v1i1 && VT == StVT && Subtarget.
hasAVX512() &&
52795 if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
52800 Ops[0] = StoredVal;
52808 if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
52809 VT == MVT::v64i1) && VT == StVT && TLI.
isTypeLegal(VT) &&
52812 if (!DCI.
isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
52814 StoredVal->
ops().slice(0, 32));
52817 StoredVal->
ops().slice(32, 32));
52842 if ((VT == MVT::f16 || VT == MVT::bf16 || VT == MVT::f32 || VT == MVT::f64) &&
52852 SignMask = ~SignMask;
52893 MVT NTVT = Subtarget.hasSSE4A()
52895 : (TLI.
isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32);
52927 auto IsExtractedElement = [](
SDValue V) {
52929 V = V.getOperand(0);
52930 unsigned Opc = V.getOpcode();
52933 V.getOperand(0).hasOneUse())
52934 return V.getOperand(0);
52937 if (
SDValue Extract = IsExtractedElement(StoredVal)) {
52942 MVT SrcVT = Src.getSimpleValueType();
52991 if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
52992 Subtarget.hasCF() && St->
isSimple()) {
53002 auto *Ld = dyn_cast<LoadSDNode>(St->
getChain());
53003 if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->
getBasePtr())
53006 bool InvertCC =
false;
53037 bool NoImplicitFloatOps =
F.hasFnAttribute(Attribute::NoImplicitFloat);
53039 !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.
hasSSE2();
53041 if (!F64IsLegal || Subtarget.is64Bit())
53044 if (VT == MVT::i64 && isa<LoadSDNode>(St->
getValue()) &&
53047 auto *Ld = cast<LoadSDNode>(St->
getValue());
53060 Ld->getBasePtr(), Ld->getMemOperand());
53074 if (VT == MVT::i64 &&
53094 auto *St = cast<MemIntrinsicSDNode>(
N);
53096 SDValue StoredVal =
N->getOperand(1);
53098 EVT MemVT = St->getMemoryVT();
53129 bool IsCommutative,
53131 bool ForceHorizOp) {
53133 if (
LHS.isUndef() ||
RHS.isUndef())
53145 MVT VT =
LHS.getSimpleValueType();
53147 "Unsupported vector type for horizontal add/sub");
53152 bool UseSubVector =
false;
53154 Op.getOperand(0).getValueType().is256BitVector() &&
53156 Op =
Op.getOperand(0);
53157 UseSubVector =
true;
53167 if (!UseSubVector && SrcOps.
size() <= 2 &&
53173 if (UseSubVector && SrcOps.
size() == 1 &&
53177 ShuffleMask.
assign(Mask.begin(), Mask.end());
53189 GetShuffle(
LHS,
A,
B, LMask);
53195 GetShuffle(
RHS,
C,
D, RMask);
53198 unsigned NumShuffles = (LMask.
empty() ? 0 : 1) + (RMask.
empty() ? 0 : 1);
53199 if (NumShuffles == 0)
53202 if (LMask.
empty()) {
53204 for (
unsigned i = 0; i != NumElts; ++i)
53208 if (RMask.
empty()) {
53210 for (
unsigned i = 0; i != NumElts; ++i)
53232 if (!(
A ==
C &&
B ==
D))
53235 PostShuffleMask.
clear();
53245 unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
53246 unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
53247 assert((NumEltsPer128BitChunk % 2 == 0) &&
53248 "Vector type should have an even number of elements in each lane");
53249 for (
unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
53250 for (
unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
53252 int LIdx = LMask[i + j], RIdx = RMask[i + j];
53253 if (LIdx < 0 || RIdx < 0 ||
53254 (!
A.getNode() && (LIdx < (
int)NumElts || RIdx < (
int)NumElts)) ||
53255 (!
B.getNode() && (LIdx >= (
int)NumElts || RIdx >= (
int)NumElts)))
53260 if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) &&
53261 !((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative))
53266 int Base = LIdx & ~1u;
53267 int Index = ((
Base % NumEltsPer128BitChunk) / 2) +
53268 ((
Base % NumElts) & ~(NumEltsPer128BitChunk - 1));
53273 if ((
B &&
Base >= (
int)NumElts) || (!
B && i >= NumEltsPer64BitChunk))
53274 Index += NumEltsPer64BitChunk;
53275 PostShuffleMask[i + j] = Index;
53282 bool IsIdentityPostShuffle =
53284 if (IsIdentityPostShuffle)
53285 PostShuffleMask.
clear();
53295 return User->getOpcode() == HOpcode &&
User->getValueType(0) == VT;
53303 if (!ForceHorizOp &&
53305 (NumShuffles < 2 || !IsIdentityPostShuffle),
53317 EVT VT =
N->getValueType(0);
53318 unsigned Opcode =
N->getOpcode();
53322 auto MergableHorizOp = [
N](
unsigned HorizOpcode) {
53323 return N->hasOneUse() &&
53325 (
N->user_begin()->getOperand(0).getOpcode() == HorizOpcode ||
53326 N->user_begin()->getOperand(1).getOpcode() == HorizOpcode);
53332 if ((Subtarget.
hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
53333 (Subtarget.
hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
53338 PostShuffleMask, MergableHorizOp(HorizOpcode))) {
53340 if (!PostShuffleMask.
empty())
53342 DAG.
getUNDEF(VT), PostShuffleMask);
53349 if (Subtarget.
hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 ||
53350 VT == MVT::v16i16 || VT == MVT::v8i32)) {
53355 PostShuffleMask, MergableHorizOp(HorizOpcode))) {
53361 {
LHS,
RHS}, HOpBuilder);
53362 if (!PostShuffleMask.
empty())
53364 DAG.
getUNDEF(VT), PostShuffleMask);
53389 EVT VT =
N->getValueType(0);
53392 int CombineOpcode =
53394 auto combineConjugation = [&](
SDValue &r) {
53397 if (XOR->getOpcode() ==
ISD::XOR && XOR.hasOneUse()) {
53400 APInt ConjugationInt32 =
APInt(32, 0x80000000);
53401 APInt ConjugationInt64 =
APInt(64, 0x8000000080000000ULL);
53418 if (combineConjugation(Res))
53421 if (combineConjugation(Res))
53430 auto AllowContract = [&DAG](
const SDNodeFlags &Flags) {
53432 Flags.hasAllowContract();
53435 auto HasNoSignedZero = [&DAG](
const SDNodeFlags &Flags) {
53437 Flags.hasNoSignedZeros();
53439 auto IsVectorAllNegativeZero = [&DAG](
SDValue Op) {
53442 return Bits.getBitWidth() == 32 && Bits.isConstant() &&
53443 Bits.getConstant() == AI;
53446 if (
N->getOpcode() !=
ISD::FADD || !Subtarget.hasFP16() ||
53447 !AllowContract(
N->getFlags()))
53450 EVT VT =
N->getValueType(0);
53451 if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
53457 SDValue FAddOp1, MulOp0, MulOp1;
53458 auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
53459 &IsVectorAllNegativeZero,
53460 &HasNoSignedZero](
SDValue N) ->
bool {
53474 HasNoSignedZero(Op0->
getFlags())) ||
53475 IsVectorAllNegativeZero(Op0->
getOperand(2)))) {
53485 if (GetCFmulFrom(
LHS))
53487 else if (GetCFmulFrom(
RHS))
53498 DAG.
getNode(NewOp,
SDLoc(
N), CVT, MulOp0, MulOp1, FAddOp1,
N->getFlags());
53516 EVT VT =
N->getValueType(0);
53518 EVT SrcVT = Src.getValueType();
53521 if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 ||
53522 SrcVT != MVT::v2f32)
53540 unsigned SrcOpcode = Src.getOpcode();
53543 EVT VT =
N->getValueType(0);
53544 EVT SrcVT = Src.getValueType();
53551 unsigned Opcode =
Op.getOpcode();
53554 Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
53568 return DAG.
getNode(SrcOpcode,
DL, VT, Trunc0, Trunc1);
53572 if (!Src.hasOneUse())
53583 switch (SrcOpcode) {
53590 return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
53597 SDValue Op0 = Src.getOperand(0);
53598 SDValue Op1 = Src.getOperand(1);
53601 return TruncateArithmetic(Op0, Op1);
53629 EVT InVT = Src.getValueType();
53643 auto IsSext = [&DAG](
SDValue V) {
53646 auto IsZext = [&DAG](
SDValue V) {
53650 bool IsSigned = IsSext(
LHS) && IsSext(
RHS);
53651 bool IsUnsigned = IsZext(
LHS) && IsZext(
RHS);
53652 if (!IsSigned && !IsUnsigned)
53656 auto isOpTruncateFree = [](
SDValue Op) {
53659 return Op.getOperand(0).getScalarValueSizeInBits() <= 16;
53662 bool IsTruncateFree = isOpTruncateFree(
LHS) && isOpTruncateFree(
RHS);
53669 if (IsUnsigned && !IsTruncateFree && Subtarget.
hasInt256() &&
53671 (InSizeInBits % 16) == 0) {
53703 if (ScalarVT != MVT::i16 || NumElems < 8 || !
isPowerOf2_32(NumElems))
53766 for (
unsigned i = 0; i != NumElems; ++i) {
53777 auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.
getOperand(1));
53778 auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.
getOperand(1));
53779 auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.
getOperand(1));
53780 auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.
getOperand(1));
53781 if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt)
53783 unsigned IdxN00 = ConstN00Elt->getZExtValue();
53784 unsigned IdxN01 = ConstN01Elt->getZExtValue();
53785 unsigned IdxN10 = ConstN10Elt->getZExtValue();
53786 unsigned IdxN11 = ConstN11Elt->getZExtValue();
53788 if (IdxN00 > IdxN10) {
53793 if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
53794 IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
53805 if (ZExtIn != N00In || SExtIn != N01In ||
53806 ZExtIn != N10In || SExtIn != N11In)
53810 auto ExtractVec = [&DAG, &
DL, NumElems](
SDValue &Ext) {
53811 EVT ExtVT = Ext.getValueType();
53818 ExtractVec(ZExtIn);
53819 ExtractVec(SExtIn);
53825 EVT InVT = Ops[0].getValueType();
53827 "Unexpected scalar element type");
53839 EVT VT =
N->getValueType(0);
53861 if (Src.getOpcode() ==
ISD::BITCAST && VT == MVT::i32) {
53862 SDValue BCSrc = Src.getOperand(0);
53877 EVT VT =
N->getValueType(0);
53905 return N->getOperand(0);
53911 unsigned ScalarSize =
N->getValueType(0).getScalarSizeInBits();
53914 EVT VT =
Op->getValueType(0);
53920 unsigned Opc =
Op.getOpcode();
53925 if (!
Op.getOperand(1).isUndef())
53928 if (NegOp0.getValueType() == VT)
53930 cast<ShuffleVectorSDNode>(
Op)->getMask());
53943 NegInsVal,
Op.getOperand(2));
53966 for (
unsigned I = 0, E = EltBits.
size();
I < E;
I++)
53967 if (!UndefElts[
I] && !EltBits[
I].isSignMask())
54052 EVT OrigVT =
N->getValueType(0);
54087 bool LegalOperations,
54090 unsigned Depth)
const {
54097 EVT VT =
Op.getValueType();
54099 unsigned Opc =
Op.getOpcode();
54111 !(SVT == MVT::f32 || SVT == MVT::f64) ||
54117 if (!Flags.hasNoSignedZeros())
54123 for (
int i = 0; i != 3; ++i)
54125 Op.getOperand(i), DAG, LegalOperations, ForCodeSize,
Depth + 1);
54127 bool NegA = !!NewOps[0];
54128 bool NegB = !!NewOps[1];
54129 bool NegC = !!NewOps[2];
54138 NewOps[i] =
Op.getOperand(i);
54155 MVT VT =
N->getSimpleValueType(0);
54168 unsigned IntOpcode;
54169 switch (
N->getOpcode()) {
54201 "Invalid opcode for combing with CTLZ");
54202 if (Subtarget.hasFastLZCNT())
54205 EVT VT =
N->getValueType(0);
54206 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 &&
54207 (VT != MVT::i64 || !Subtarget.is64Bit()))
54223 }
else if (
N->getOpcode() ==
ISD::SUB) {
54232 auto *
C = dyn_cast<ConstantSDNode>(OpSizeTM1);
54240 if (VT == MVT::i8) {
54259 EVT VT =
N->getValueType(0);
54263 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
54283 DAG, DCI, Subtarget))
54329 auto *N1C = dyn_cast<ConstantSDNode>(N1);
54330 auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.
getOperand(1));
54331 if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) {
54349 EVT VT =
N->getValueType(0);
54354 EVT SrcVT = Src.getValueType();
54361 for (
unsigned I = 0;
I != NumElts; ++
I)
54362 ReverseMask[
I] = (NumElts - 1) -
I;
54376 unsigned Opcode =
N->getOpcode();
54379 EVT VT =
N->getValueType(0);
54400 EVT VT =
N->getValueType(0);
54429 if (V.getValueType().isVector())
54439 EVT VT =
N->getValueType(0);
54443 if (!((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
54444 (VT == MVT::f64 && Subtarget.
hasSSE2()) ||
54445 (VT == MVT::v4f32 && Subtarget.
hasSSE1() && !Subtarget.
hasSSE2())))
54448 auto isAllOnesConstantFP = [](
SDValue V) {
54449 if (V.getSimpleValueType().isVector())
54451 auto *
C = dyn_cast<ConstantFPSDNode>(V);
54452 return C &&
C->getConstantFPValue()->isAllOnesValue();
54488 return N->getOperand(1);
54505 return N->getOperand(1);
54509 return N->getOperand(0);
54528 unsigned NewOp = 0;
54529 switch (
N->getOpcode()) {
54536 N->getOperand(0),
N->getOperand(1));
54541 EVT VT =
N->getValueType(0);
54542 if (Subtarget.useSoftFloat() ||
isSoftF16(VT, Subtarget))
54547 if (!((Subtarget.
hasSSE1() && VT == MVT::f32) ||
54548 (Subtarget.
hasSSE2() && VT == MVT::f64) ||
54549 (Subtarget.hasFP16() && VT == MVT::f16) ||
54561 return DAG.
getNode(MinMaxOp,
DL, VT, Op0, Op1,
N->getFlags());
54566 return DAG.
getNode(MinMaxOp,
DL, VT, Op0, Op1,
N->getFlags());
54568 return DAG.
getNode(MinMaxOp,
DL, VT, Op1, Op0,
N->getFlags());
54602 return DAG.
getSelect(
DL, VT, IsOp0Nan, Op1, MinOrMax);
54607 EVT VT =
N->getValueType(0);
54616 MVT InVT = In.getSimpleValueType();
54620 LoadSDNode *LN = cast<LoadSDNode>(
N->getOperand(0));
54642 EVT VT =
N->getValueType(0);
54645 SDValue In =
N->getOperand(IsStrict ? 1 : 0);
54646 MVT InVT = In.getSimpleValueType();
54658 DAG.
getNode(
N->getOpcode(), dl, {VT, MVT::Other},
54659 {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
54681 MVT VT =
N->getSimpleValueType(0);
54713 EVT SrcVT = Src.getValueType();
54721 APInt Undefs0, Undefs1;
54730 for (
int I = 0;
I != NumElts; ++
I)
54731 ResultBits.
push_back(~EltBits0[
I] & EltBits1[
I]);
54741 for (
APInt &Elt : EltBits0)
54757 auto GetDemandedMasks = [&](
SDValue Op,
bool Invert =
false) {
54766 for (
int I = 0;
I != NumElts; ++
I) {
54767 if (UndefElts[
I]) {
54772 }
else if ((Invert && !EltBits[
I].isAllOnes()) ||
54773 (!Invert && !EltBits[
I].
isZero())) {
54781 APInt Bits0, Elts0;
54782 APInt Bits1, Elts1;
54783 std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
54784 std::tie(Bits1, Elts1) = GetDemandedMasks(N0,
true);
54840 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
54842 if (
N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {
54853 LoadSDNode *LN = cast<LoadSDNode>(
N->getOperand(IsStrict ? 1 : 0));
54858 N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
54859 {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)});
54881 EVT DstVT =
N->getValueType(0);
54885 EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
54887 if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16)
54891 SDValue IntermediateBitwidthOp;
54894 IntermediateBitwidthOp = N0;
54906 if (!isa<ConstantSDNode>(CMovOp0.
getNode()) ||
54907 !isa<ConstantSDNode>(CMovOp1.
getNode()))
54913 if (IntermediateBitwidthOp) {
54914 unsigned IntermediateOpc = IntermediateBitwidthOp.
getOpcode();
54915 CMovOp0 = DAG.
getNode(IntermediateOpc,
DL, DstVT, CMovOp0);
54916 CMovOp1 = DAG.
getNode(IntermediateOpc,
DL, DstVT, CMovOp1);
54922 EVT CMovVT = DstVT;
54924 if (DstVT == MVT::i16) {
54933 if (CMovVT != DstVT)
54946 EVT VT =
N->getValueType(0);
54949 EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
54993 EVT VT = Ext->getValueType(0);
54994 if (VT != MVT::i64)
55004 bool NSW =
Add->getFlags().hasNoSignedWrap();
55005 bool NUW =
Add->getFlags().hasNoUnsignedWrap();
55011 if ((Sext && !NSW) || (!Sext && !NUW))
55017 auto *AddOp1C = dyn_cast<ConstantSDNode>(AddOp1);
55026 bool HasLEAPotential =
false;
55027 for (
auto *
User : Ext->users()) {
55029 HasLEAPotential =
true;
55033 if (!HasLEAPotential)
55037 int64_t AddC = Sext ? AddOp1C->getSExtValue() : AddOp1C->getZExtValue();
55044 Flags.setNoSignedWrap(NSW);
55045 Flags.setNoUnsignedWrap(NUW);
55067 unsigned ExtendOpcode = Extend->
getOpcode();
55074 if (!isa<ConstantSDNode>(CMovOp0.
getNode()) ||
55075 !isa<ConstantSDNode>(CMovOp1.
getNode()))
55079 if (TargetVT != MVT::i32 && TargetVT != MVT::i64)
55084 if (VT != MVT::i16 && !(ExtendOpcode ==
ISD::SIGN_EXTEND && VT == MVT::i32))
55089 EVT ExtendVT = TargetVT;
55091 ExtendVT = MVT::i32;
55093 CMovOp0 = DAG.
getNode(ExtendOpcode,
DL, ExtendVT, CMovOp0);
55094 CMovOp1 = DAG.
getNode(ExtendOpcode,
DL, ExtendVT, CMovOp1);
55100 if (ExtendVT != TargetVT)
55101 Res = DAG.
getNode(ExtendOpcode,
DL, TargetVT, Res);
55111 EVT VT =
N->getValueType(0);
55120 if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
55121 SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
55156 EVT VT =
N->getValueType(0);
55164 bool ReplaceOtherUses = !N0.
hasOneUse();
55167 if (ReplaceOtherUses) {
55186 DAG, DCI, Subtarget))
55211 "ConstantFP build vector expected");
55223 EVT VT = V.getValueType();
55225 for (
const SDValue &
Op : V->op_values()) {
55226 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
55247 for (
const SDValue &
Op : V->op_values()) {
55248 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
55249 if (Cst->isNegative())
55261 EVT VT =
N->getValueType(0);
55263 bool IsStrict =
N->isTargetOpcode()
55265 :
N->isStrictFPOpcode();
55272 SDValue A =
N->getOperand(IsStrict ? 1 : 0);
55273 SDValue B =
N->getOperand(IsStrict ? 2 : 1);
55274 SDValue C =
N->getOperand(IsStrict ? 3 : 2);
55279 if (!IsStrict && Flags.hasAllowReassociation() &&
55286 if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
55288 !(ScalarVT == MVT::f16 && Subtarget.hasFP16()) &&
55289 !(ScalarVT == MVT::bf16 && Subtarget.hasAVX10_2()))
55292 auto invertIfNegative = [&DAG, &TLI, &DCI](
SDValue &V) {
55304 SDValue Vec = V.getOperand(0);
55306 Vec, DAG, LegalOperations, CodeSize)) {
55308 NegV, V.getOperand(1));
55324 bool NegA = invertIfNegative(
A);
55325 bool NegB = invertIfNegative(
B);
55326 bool NegC = invertIfNegative(
C);
55328 if (!NegA && !NegB && !NegC)
55331 unsigned NewOpcode =
55337 assert(
N->getNumOperands() == 4 &&
"Shouldn't be greater than 4");
55338 return DAG.
getNode(NewOpcode, dl, {VT, MVT::Other},
55339 {
N->getOperand(0),
A,
B,
C});
55341 if (
N->getNumOperands() == 4)
55342 return DAG.
getNode(NewOpcode, dl, VT,
A,
B,
C,
N->getOperand(3));
55343 return DAG.
getNode(NewOpcode, dl, VT,
A,
B,
C);
55352 EVT VT =
N->getValueType(0);
55365 if (
N->getNumOperands() == 4)
55366 return DAG.
getNode(NewOpcode, dl, VT,
N->getOperand(0),
N->getOperand(1),
55367 NegN2,
N->getOperand(3));
55368 return DAG.
getNode(NewOpcode, dl, VT,
N->getOperand(0),
N->getOperand(1),
55377 EVT VT =
N->getValueType(0);
55385 bool ReplaceOtherUses = !N0.
hasOneUse();
55388 if (ReplaceOtherUses) {
55405 DAG, DCI, Subtarget))
55457 EVT VT =
N->getValueType(0);
55458 EVT OpVT =
LHS.getValueType();
55466 if (VT == MVT::i1) {
55514 LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
55516 EVT SrcVT =
LHS.getOperand(0).getValueType();
55535 if (
auto *
C = dyn_cast<ConstantSDNode>(
RHS)) {
55536 const APInt &CInt =
C->getAPIntValue();
55569 if (IsSEXT0 && IsVZero1) {
55571 "Unexpected operand type");
55580 "Unexpected condition code!");
55590 bool CanMakeSigned =
false;
55616 RHS, DAG,
false,
true))
55647 NewCC,
DL, DAG, Subtarget))
55649 return DAG.
getSetCC(
DL, VT, LHSOut, RHSOut, NewCC);
55723 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32 &&
55724 LHS.getValueType() == MVT::v4f32)
55744 MVT SrcVT = Src.getSimpleValueType();
55745 MVT VT =
N->getSimpleValueType(0);
55749 assert(VT == MVT::i32 && NumElts <= NumBits &&
"Unexpected MOVMSK types");
55758 for (
unsigned Idx = 0;
Idx != NumElts; ++
Idx)
55759 if (!UndefElts[
Idx] && EltBits[
Idx].isNegative())
55768 Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
55806 MVT ShiftVT = SrcVT;
55807 SDValue ShiftLHS = Src.getOperand(0);
55808 SDValue ShiftRHS = Src.getOperand(1);
55812 ShiftLHS = DAG.
getBitcast(ShiftVT, ShiftLHS);
55813 ShiftRHS = DAG.
getBitcast(ShiftVT, ShiftRHS);
55816 ShiftLHS, ShiftAmt, DAG);
55818 ShiftRHS, ShiftAmt, DAG);
55827 if (
N->isOnlyUserOf(Src.getNode())) {
55833 UndefElts, EltBits)) {
55835 for (
unsigned Idx = 0;
Idx != NumElts; ++
Idx) {
55836 if (!UndefElts[
Idx] && EltBits[
Idx].isNegative())
55860 MVT VT =
N->getSimpleValueType(0);
55874 auto *
MemOp = cast<X86MaskedGatherScatterSDNode>(
N);
55878 if (Mask.getScalarValueSizeInBits() != 1) {
55896 if (
auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
55897 SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
55898 Gather->getMask(),
Base, Index, Scale } ;
55900 Gather->getMemoryVT(),
DL, Ops,
55901 Gather->getMemOperand(),
55902 Gather->getIndexType(),
55903 Gather->getExtensionType());
55905 auto *Scatter = cast<MaskedScatterSDNode>(GorS);
55906 SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
55907 Scatter->getMask(),
Base, Index, Scale };
55909 Scatter->getMemoryVT(),
DL,
55910 Ops, Scatter->getMemOperand(),
55911 Scatter->getIndexType(),
55912 Scatter->isTruncatingStore());
55918 auto *GorS = cast<MaskedGatherScatterSDNode>(
N);
55919 SDValue Index = GorS->getIndex();
55921 SDValue Scale = GorS->getScale();
55925 unsigned IndexWidth = Index.getScalarValueSizeInBits();
55934 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Index)) {
55937 EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
55949 Index.getOperand(0).getScalarValueSizeInBits() <= 32 &&
55951 EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
55962 if (Index.getOpcode() ==
ISD::ADD &&
55963 Index.getValueType().getVectorElementType() == PtrVT &&
55964 isa<ConstantSDNode>(Scale)) {
55966 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
55970 if (UndefElts.
none()) {
55972 APInt Adder =
C->getAPIntValue() * ScaleAmt;
55976 Index = Index.getOperand(0);
55988 Index.getOperand(1),
Splat);
55991 Index.getOperand(0),
Splat);
55999 unsigned IndexWidth = Index.getScalarValueSizeInBits();
56002 if (IndexWidth != 32 && IndexWidth != 64) {
56003 MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32;
56004 EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT);
56011 SDValue Mask = GorS->getMask();
56012 if (Mask.getScalarValueSizeInBits() != 1) {
56029 SDValue EFLAGS =
N->getOperand(1);
56042 SDValue EFLAGS =
N->getOperand(3);
56051 N->getOperand(1),
Cond, Flags);
56071 EVT VT =
N->getValueType(0);
56072 bool IsStrict =
N->isStrictFPOpcode();
56074 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56084 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Op0.
getOperand(1))) {
56096 SourceConst = DAG.
getNode(
N->getOpcode(),
DL, {VT, MVT::Other},
56097 {N->getOperand(0), SDValue(BV, 0)});
56133 if (SrcWidth % DestWidth != 0)
56139 unsigned NumElts = VecWidth / DestWidth;
56145 return DAG.
getNode(
N->getOpcode(),
DL,
N->getValueType(0), NewExtElt);
56150 bool IsStrict =
N->isStrictFPOpcode();
56151 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56152 EVT VT =
N->getValueType(0);
56165 if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
56171 (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
56172 : ScalarSize < 32 ? MVT::i32
56178 {
N->getOperand(0),
P});
56194 {
N->getOperand(0),
P});
56205 {
N->getOperand(0), Op0});
56217 bool IsStrict =
N->isStrictFPOpcode();
56222 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56223 EVT VT =
N->getValueType(0);
56236 if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
56242 (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
56243 : ScalarSize < 32 ? MVT::i32
56249 {
N->getOperand(0),
P});
56263 {
N->getOperand(0),
P});
56273 if (NumSignBits >= (
BitWidth - 31)) {
56274 EVT TruncVT = MVT::i32;
56282 {
N->getOperand(0), Trunc});
56287 assert(InVT == MVT::v2i64 &&
"Unexpected VT!");
56293 {
N->getOperand(0), Shuf});
56300 if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
56305 if (VT == MVT::f16 || VT == MVT::f128)
56310 if (Subtarget.hasDQI() && VT != MVT::f80)
56314 Op0.
hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) {
56315 std::pair<SDValue, SDValue> Tmp =
56336 if (!Subtarget.hasAVX10_2())
56340 EVT SrcVT =
N->getOperand(0).getValueType();
56341 EVT DstVT =
N->getValueType(0);
56344 if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
56349 N->getOperand(0), V2F32Value);
56361 assert(Flags.getValueType() == MVT::i32 &&
"Unexpected VT!");
56365 switch (
User->getOpcode()) {
56396 assert(Flags.getValueType() == MVT::i32 &&
"Unexpected VT!");
56400 switch (
User->getOpcode()) {
56435 EVT VT =
Op.getValueType();
56446 Op.hasOneUse() && isa<ConstantSDNode>(
Op.getOperand(1)) &&
56449 const APInt &ShAmt =
Op.getConstantOperandAPInt(1);
56455 if (Mask.isSignedIntN(32)) {
56472 Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
56473 SDValue BoolVec = Src.getOperand(0);
56474 unsigned ShAmt = 0;
56497 EVT SrcVT = Src.getValueType();
56508 Op =
Op.getOperand(0);
56513 EVT OpVT =
Op.getValueType();
56527 switch (
Op.getOpcode()) {
56532 if (isa<ConstantSDNode>(
Op.getOperand(1)))
56558 Op = DAG.
getNode(NewOpc, dl, VTs, Op0, Op1);
56566 return Op.getValue(1);
56573 "Expected X86ISD::ADD or X86ISD::SUB");
56578 MVT VT =
LHS.getSimpleValueType();
56582 if (IsSub &&
isOneConstant(
N->getOperand(1)) && !
N->hasAnyUseOfValue(0))
56587 if (!
N->hasAnyUseOfValue(1)) {
56600 if (GenericAddSub->hasOneUse() &&
56601 GenericAddSub->user_begin()->isOnlyUserOf(
N))
56608 MatchGeneric(
LHS,
RHS,
false);
56620 SDValue BorrowIn =
N->getOperand(2);
56623 MVT VT =
N->getSimpleValueType(0);
56631 !
N->hasAnyUseOfValue(1))
56633 LHS.getOperand(1), BorrowIn);
56643 SDValue CarryIn =
N->getOperand(2);
56644 auto *LHSC = dyn_cast<ConstantSDNode>(
LHS);
56645 auto *RHSC = dyn_cast<ConstantSDNode>(
RHS);
56655 if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&
56660 EVT VT =
N->getValueType(0);
56673 if (LHSC && RHSC && !LHSC->isZero() && !
N->hasAnyUseOfValue(1)) {
56675 APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue();
56682 MVT VT =
N->getSimpleValueType(0);
56689 if (
LHS.getOpcode() ==
ISD::ADD && RHSC && RHSC->isZero() &&
56690 !
N->hasAnyUseOfValue(1))
56692 LHS.getOperand(1), CarryIn);
56700 using namespace SDPatternMatch;
56745 APInt Idx0L, Idx0H, Idx1L, Idx1H;
56746 SDValue Vec0L, Vec0H, Vec1L, Vec1H;
56762 if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 ||
56763 Idx1H != 2 * i + 3)
56771 Mul.getValueType().getVectorNumElements() != 2 * e)
56775 if (
Mul != Vec0L ||
Mul != Vec1L ||
Mul != Vec0H ||
Mul != Vec1H)
56782 Mode == ShrinkMode::MULU16)
56792 EVT InVT = Ops[0].getValueType();
56810 using namespace SDPatternMatch;
56853 SDValue N00In, N01In, N10In, N11In;
56854 APInt IdxN00, IdxN01, IdxN10, IdxN11;
56866 if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
56867 IdxN11 != 2 * i + 1)
56887 if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In)
56893 EVT OpVT = Ops[0].getValueType();
56895 "Unexpected scalar element type");
56945 if (!Op0HiZero || !Op1HiZero)
56950 for (
int i = 0; i != (int)NumElts; ++i) {
56951 Mask.push_back(2 * i);
56952 Mask.push_back(2 * (i + NumElts));
56974 auto isSuitableCmov = [](
SDValue V) {
56977 if (!isa<ConstantSDNode>(V.getOperand(0)) ||
56978 !isa<ConstantSDNode>(V.getOperand(1)))
56981 (V.getConstantOperandAPInt(0).isSignedIntN(32) &&
56982 V.getConstantOperandAPInt(1).isSignedIntN(32));
56987 SDValue OtherOp =
N->getOperand(1);
56988 if (!isSuitableCmov(Cmov))
56990 if (!isSuitableCmov(Cmov))
56998 EVT VT =
N->getValueType(0);
57008 !isa<ConstantSDNode>(OtherOp.
getOperand(0)) &&
57010 auto *MemNode = dyn_cast<MemSDNode>(Use);
57011 return MemNode && MemNode->getBasePtr().getNode() == N;
57034 EVT VT =
N->getValueType(0);
57091 if (Subtarget.hasVNNI() && Subtarget.
useAVX512Regs() && VT == MVT::v16i32) {
57092 using namespace SDPatternMatch;
57093 SDValue Accum, Lo0, Lo1, Hi0, Hi1;
57129 assert(
Cond.getResNo() == 1 &&
"Unexpected result number");
57143 if (!(TrueOp ==
X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp ==
X))
57157 (FalseOp ==
Cond.getValue(0) || TrueOp ==
Cond.getValue(0)) &&
57178 EVT VT =
N->getValueType(0);
57179 auto *Op0C = dyn_cast<ConstantSDNode>(Op0);
57186 APInt NewImm = Op0C->getAPIntValue() - 1;
57219 cast<MemSDNode>(
N)->getMemoryVT(),
57220 cast<MemSDNode>(
N)->getMemOperand());
57226 EVT VT =
N->getValueType(0);
57231 auto IsNonOpaqueConstant = [&](
SDValue Op) {
57287 unsigned Opcode =
N->getOpcode();
57289 "Unknown PCMP opcode");
57293 MVT VT =
N->getSimpleValueType(0);
57306 APInt LHSUndefs, RHSUndefs;
57313 for (
unsigned I = 0;
I != NumElts; ++
I) {
57315 Results[
I] = (LHSBits[
I] == RHSBits[
I]) ? Ones : Zero;
57317 bool AnyUndef = LHSUndefs[
I] || RHSUndefs[
I];
57318 Results[
I] = (!AnyUndef && LHSBits[
I].sgt(RHSBits[
I])) ? Ones : Zero;
57331static std::optional<unsigned>
57333 unsigned NumSignificantBitsRHS) {
57335 assert(SVT == MVT::f32 &&
"Only tested for float so far");
57338 "Only PCMPEQ/PCMPGT currently supported");
57345 if (FPPrec >= NumSignificantBitsLHS && FPPrec >= NumSignificantBitsRHS)
57348 return std::nullopt;
57358 assert(Subtarget.
hasAVX() &&
"AVX assumed for concat_vectors");
57371 unsigned NumOps = Ops.
size();
57395 (EltSizeInBits >= 32 &&
57411 Op0.
getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())
57460 return Op.getOpcode() == Op0.
getOpcode() &&
Op.hasOneUse();
57480 bool AllConstants =
true;
57481 bool AllSubs =
true;
57484 if (isa<LoadSDNode>(BC0) &&
all_of(SubOps, [&](
SDValue SubOp) {
57488 for (
unsigned I = 0, E = SubOps.size();
I != E; ++
I) {
57498 return AllConstants || AllSubs;
57504 (EltSizeInBits >= 32 || Subtarget.
hasInt256()) &&
57505 (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
57508 for (
int M : cast<ShuffleVectorSDNode>(Ops[0])->getMask()) {
57509 M = M >= NumSubElts ? M + NumSubElts : M;
57512 for (
int M : cast<ShuffleVectorSDNode>(Ops[1])->getMask()) {
57514 M = (M >= NumSubElts ? M + NumSubElts : M) + NumSubElts;
57518 ConcatSubOperand(VT, Ops, 1), NewMask);
57526 if (VT == MVT::v4f64 || VT == MVT::v4i64)
57528 ConcatSubOperand(VT, Ops, 0),
57529 ConcatSubOperand(VT, Ops, 0));
57531 if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.
hasInt256()))
57534 DL, VT, ConcatSubOperand(VT, Ops, 0),
57544 ConcatSubOperand(VT, Ops, 0));
57554 ConcatSubOperand(VT, Ops, 0),
57555 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57562 if (!IsSplat && EltSizeInBits >= 32 &&
57572 ConcatSubOperand(VT, Ops, 0),
57573 ConcatSubOperand(VT, Ops, 1));
57583 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57587 if (!IsSplat && EltSizeInBits == 32 &&
57599 if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
57600 uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
57601 uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
57604 ConcatSubOperand(VT, Ops, 0),
57618 ConcatSubOperand(SrcVT, Ops, 0),
57619 ConcatSubOperand(SrcVT, Ops, 1));
57623 if (!IsSplat && NumOps == 2 &&
57628 for (
unsigned i = 0; i != NumOps; ++i) {
57633 for (
int M : SubMask) {
57635 M += i * NumSrcElts;
57639 if (ConcatMask.
size() == (NumOps * NumSrcElts)) {
57641 Ops[1].getOperand(1), DAG,
DL);
57654 for (
unsigned i = 0; i != NumOps; ++i) {
57659 for (
int M : SubMask) {
57661 int Src = M < NumSrcElts ? 0 : 2;
57662 M += M < NumSrcElts ? 0 : NumSrcElts;
57665 if (Ops[0].getOperand(Src) != Ops[i].getOperand(Src))
57666 M += i * NumSrcElts;
57671 if (ConcatMask.
size() == (NumOps * NumSrcElts)) {
57673 Ops[1].getOperand(0), DAG,
DL);
57675 Ops[1].getOperand(2), DAG,
DL);
57685 assert(NumOps == 2 &&
"Bad concat_vectors operands");
57686 unsigned Imm0 = Ops[0].getConstantOperandVal(2);
57687 unsigned Imm1 = Ops[1].getConstantOperandVal(2);
57689 if ((Imm0 & 0x88) == 0 && (Imm1 & 0x88) == 0) {
57690 int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3), (
int)(Imm1 & 0x03),
57691 (
int)((Imm1 >> 4) & 0x3)};
57694 Ops[0].getOperand(1), DAG,
DL);
57696 Ops[1].getOperand(1), DAG,
DL);
57708 unsigned Imm0 = Ops[0].getConstantOperandVal(2);
57709 unsigned Imm1 = Ops[1].getConstantOperandVal(2);
57710 unsigned Imm = ((Imm0 & 1) << 0) | ((Imm0 & 2) << 1) | 0x08 |
57711 ((Imm1 & 1) << 4) | ((Imm1 & 2) << 5) | 0x80;
57713 Ops[0].getOperand(1), DAG,
DL);
57715 Ops[1].getOperand(1), DAG,
DL);
57723 EVT SrcVT = Ops[0].getOperand(0).getValueType();
57725 SrcVT == Ops[1].getOperand(0).getValueType() &&
57731 ConcatSubOperand(NewSrcVT, Ops, 0));
57739 if (!IsSplat && NumOps == 2 &&
57742 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs())))) {
57743 EVT SrcVT = Ops[0].getOperand(0).getValueType();
57745 SrcVT == Ops[1].getOperand(0).getValueType()) {
57748 ConcatSubOperand(NewSrcVT, Ops, 0));
57756 if (VT == MVT::v4i64 && !Subtarget.
hasInt256() &&
57758 return Op.getConstantOperandAPInt(1) == 32;
57764 {8, 0, 8, 2, 8, 4, 8, 6});
57767 {1, 8, 3, 8, 5, 8, 7, 8});
57778 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs()))) &&
57780 return Op0.getOperand(1) == Op.getOperand(1);
57783 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57791 return Op0.getOperand(1) == Op.getOperand(1);
57794 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57804 ConcatSubOperand(VT, Ops, 0),
57805 ConcatSubOperand(VT, Ops, 1));
57811 (Subtarget.
hasInt256() || VT == MVT::v8i32) &&
57812 (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
57815 ConcatSubOperand(VT, Ops, 0),
57816 ConcatSubOperand(VT, Ops, 1));
57820 unsigned MaxSigBitsLHS = 0, MaxSigBitsRHS = 0;
57821 for (
unsigned I = 0;
I != NumOps; ++
I) {
57823 std::max(MaxSigBitsLHS,
57826 std::max(MaxSigBitsRHS,
57828 if (MaxSigBitsLHS == EltSizeInBits && MaxSigBitsRHS == EltSizeInBits)
57840 if (std::optional<unsigned> CastOpc =
57847 bool IsAlwaysSignaling;
57864 ConcatSubOperand(VT, Ops, 0));
57872 return Op0.getOperand(2) == Op.getOperand(2);
57875 ConcatSubOperand(VT, Ops, 0),
57876 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57884 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs())))) {
57886 ConcatSubOperand(VT, Ops, 0),
57887 ConcatSubOperand(VT, Ops, 1));
57896 if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
57900 ConcatSubOperand(VT, Ops, 0),
57901 ConcatSubOperand(VT, Ops, 1));
57908 ConcatSubOperand(VT, Ops, 0),
57909 ConcatSubOperand(VT, Ops, 1));
57919 ConcatSubOperand(VT, Ops, 0),
57920 ConcatSubOperand(VT, Ops, 1));
57931 ConcatSubOperand(SrcVT, Ops, 0),
57932 ConcatSubOperand(SrcVT, Ops, 1));
57940 return Op0.getOperand(2) == Op.getOperand(2);
57943 ConcatSubOperand(VT, Ops, 0),
57944 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57949 uint64_t Mask0 = Ops[0].getConstantOperandVal(2);
57950 uint64_t Mask1 = Ops[1].getConstantOperandVal(2);
57953 Mask0 = (Mask0 << 8) | Mask0;
57954 Mask1 = (Mask1 << 8) | Mask1;
57961 return DAG.
getSelect(
DL, VT, Sel, ConcatSubOperand(VT, Ops, 1),
57962 ConcatSubOperand(VT, Ops, 0));
57966 if (!IsSplat && Subtarget.
hasAVX512() &&
57969 (EltSizeInBits >= 32 || Subtarget.hasBWI())) {
57970 EVT SelVT = Ops[0].getOperand(0).getValueType();
57977 ConcatSubOperand(VT, Ops, 1),
57978 ConcatSubOperand(VT, Ops, 2));
57984 (EltSizeInBits >= 32 || Subtarget.
hasInt256()) &&
57985 IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
57986 EVT SelVT = Ops[0].getOperand(0).getValueType();
57991 ConcatSubOperand(VT, Ops, 1),
57992 ConcatSubOperand(VT, Ops, 2));
58004 *FirstLd->getMemOperand(), &
Fast) &&
58016 for (
unsigned I = 0;
I != NumOps; ++
I) {
58023 EltBits.
append(OpEltBits);
58047 auto *Mem = cast<MemSDNode>(Op0);
58078 EVT VT =
N->getValueType(0);
58079 EVT SrcVT =
N->getOperand(0).getValueType();
58087 for (
unsigned I = 0, E = Ops.
size();
I != E; ++
I) {
58090 Constant.insertBits(
C->getAPIntValue(),
I * SubSizeInBits);
58091 if (
I == (E - 1)) {
58117 MVT OpVT =
N->getSimpleValueType(0);
58123 SDValue SubVec =
N->getOperand(1);
58125 uint64_t IdxVal =
N->getConstantOperandVal(2);
58158 Ins.getOperand(1).getValueSizeInBits().getFixedValue() <=
58162 Ins.getOperand(1),
N->getOperand(2));
58187 if (ExtIdxVal != 0) {
58192 for (
int i = 0; i != VecNumElts; ++i)
58195 for (
int i = 0; i != SubVecNumElts; ++i)
58196 Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
58214 if (SubVectorOps.
size() == 2 &&
58238 auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
58240 SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
58243 MemIntr->getMemoryVT(),
58244 MemIntr->getMemOperand());
58253 auto *VecLd = dyn_cast<LoadSDNode>(Vec);
58254 auto *SubLd = dyn_cast<LoadSDNode>(SubVec);
58255 if (VecLd && SubLd &&
58273 SDValue Sel = Ext->getOperand(0);
58281 MVT VT = Ext->getSimpleValueType(0);
58289 MVT WideVT = Ext->getOperand(0).getSimpleValueType();
58292 "Unexpected vector type with legal operations");
58296 unsigned ExtIdx = Ext->getConstantOperandVal(1);
58297 if (SelElts % CastedElts == 0) {
58300 ExtIdx *= (SelElts / CastedElts);
58301 }
else if (CastedElts % SelElts == 0) {
58304 unsigned IndexDivisor = CastedElts / SelElts;
58305 if (ExtIdx % IndexDivisor != 0)
58307 ExtIdx /= IndexDivisor;
58309 llvm_unreachable(
"Element count of simple vector types are not divisible?");
58313 unsigned NarrowElts = SelElts / NarrowingFactor;
58335 if (!
N->getValueType(0).isSimple())
58338 MVT VT =
N->getSimpleValueType(0);
58340 unsigned IdxVal =
N->getConstantOperandVal(1);
58352 auto isConcatenatedNot = [](
SDValue V) {
58356 SDValue NotOp = V->getOperand(0);
58359 if (isConcatenatedNot(InVecBC.
getOperand(0)) ||
58420 cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)
58424 if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) {
58428 unsigned NumSubVecs = InSizeInBits / SizeInBits;
58432 unsigned SubVecIdx = IdxVal / NumSubElts;
58437 SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
58438 if (Src.getValueSizeInBits() == InSizeInBits) {
58439 unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
58440 unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts;
58447 auto IsExtractFree = [](
SDValue V) {
58448 if (V.hasOneUse()) {
58458 return V.isUndef();
58465 if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
58486 Subtarget.hasVLX())) &&
58487 (VT == MVT::v4i32 || VT == MVT::v4f32)) {
58489 if (Src.getValueType().getScalarSizeInBits() == 32)
58495 (SizeInBits == 128 || SizeInBits == 256) &&
58498 if (Ext.getValueSizeInBits() > SizeInBits)
58501 return DAG.
getNode(ExtOp,
DL, VT, Ext);
58510 return DAG.
getNode(InOpcode,
DL, VT, Ext0, Ext1, Ext2);
58512 if (IdxVal == 0 && InOpcode ==
ISD::TRUNCATE && Subtarget.hasVLX() &&
58513 (SizeInBits == 128 || SizeInBits == 256)) {
58517 return DAG.
getNode(InOpcode,
DL, VT, Ext);
58520 if (SizeInBits == 128 || SizeInBits == 256) {
58521 switch (InOpcode) {
58569 DAG.getTargetConstant(M,
DL, MVT::i8));
58581 DAG.
getNode(InOpcode,
DL, InVecVT, Src0, Mask, Src1);
58604 using namespace SDPatternMatch;
58605 EVT VT =
N->getValueType(0);
58613 if (VT == MVT::v1i1 && Src.getOpcode() ==
ISD::AND && Src.hasOneUse() &&
58619 Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
58620 Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
58623 Src.getOperand(1));
58627 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {
58628 auto IsExt64 = [&DAG](
SDValue Op,
bool IsZeroExt) {
58629 if (
Op.getValueType() != MVT::i64)
58632 if (
Op.getOpcode() == Opc &&
58633 Op.getOperand(0).getScalarValueSizeInBits() <= 32)
58634 return Op.getOperand(0);
58636 if (
auto *Ld = dyn_cast<LoadSDNode>(
Op))
58637 if (Ld->getExtensionType() == Ext &&
58638 Ld->getMemoryVT().getScalarSizeInBits() <= 32)
58661 if (VT == MVT::v2i64 && Src.getOpcode() ==
ISD::BITCAST) {
58664 if (
SrcOp.getValueType() == MVT::f64)
58668 if (
SrcOp.getValueType() == MVT::x86mmx)
58672 if (VT == MVT::v4i32) {
58677 m_AllOf(m_SpecificVT(MVT::f16),
m_Value(HalfSrc))))))
58689 unsigned BroadcastSizeInBits =
58690 User->getValueSizeInBits(0).getFixedValue();
58691 if (BroadcastSizeInBits == SizeInBits)
58693 if (BroadcastSizeInBits > SizeInBits)
58701 switch (Src.getOpcode()) {
58705 if (
auto *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
58712 Amt->getZExtValue(), DAG);
58718 if (
auto *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(2))) {
58724 Amt->getAPIntValue().urem(Src.getScalarValueSizeInBits());
58729 return DAG.
getNode(Src.getOpcode(),
DL, VT, SrcVec0, SrcVec1,
58771 LHS.getOperand(0).getValueType() == MVT::v4i32) {
58774 LHS.getOperand(0), { 0, -1, 1, -1 });
58781 RHS.getOperand(0).getValueType() == MVT::v4i32) {
58784 RHS.getOperand(0), { 0, -1, 1, -1 });
58795 MVT VT =
N->getSimpleValueType(0);
58798 unsigned Opc =
N->getOpcode();
58801 "Unexpected PMADD opcode");
58810 APInt LHSUndefs, RHSUndefs;
58812 unsigned SrcEltBits =
LHS.getScalarValueSizeInBits();
58817 for (
unsigned I = 0, E = LHSBits.
size();
I != E;
I += 2) {
58818 APInt LHSLo = LHSBits[
I + 0], LHSHi = LHSBits[
I + 1];
58819 APInt RHSLo = RHSBits[
I + 0], RHSHi = RHSBits[
I + 1];
58820 LHSLo = IsPMADDWD ? LHSLo.
sext(DstEltBits) : LHSLo.
zext(DstEltBits);
58821 LHSHi = IsPMADDWD ? LHSHi.sext(DstEltBits) : LHSHi.zext(DstEltBits);
58823 APInt Hi = LHSHi * RHSHi.sext(DstEltBits);
58825 Result.push_back(Res);
58841 EVT VT =
N->getValueType(0);
58843 unsigned Opcode =
N->getOpcode();
58844 unsigned InOpcode = In.getOpcode();
58851 auto *Ld = cast<LoadSDNode>(In);
58852 if (Ld->isSimple()) {
58853 MVT SVT = In.getSimpleValueType().getVectorElementType();
58860 Ext,
DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
58861 MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->
getFlags());
58869 if (Opcode == InOpcode)
58870 return DAG.
getNode(Opcode,
DL, VT, In.getOperand(0));
58877 In.getOperand(0).getOperand(0).getValueSizeInBits() ==
58878 In.getValueSizeInBits())
58879 return DAG.
getNode(Opcode,
DL, VT, In.getOperand(0).getOperand(0));
58888 EVT EltVT = In.getOperand(0).getValueType();
58890 for (
unsigned I = 0;
I != NumElts; ++
I)
58891 Elts[
I * Scale] = In.getOperand(
I);
58908 EVT VT =
N->getValueType(0);
58920 SDValue Src =
N->getOperand(0).getOperand(0);
58921 uint64_t Amt =
N->getConstantOperandVal(1) +
58922 N->getOperand(0).getConstantOperandVal(1);
58923 EVT SrcVT = Src.getValueType();
58945 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C())
58951 if (
N->getValueType(0) != MVT::f32 ||
58952 N->getOperand(0).getOperand(0).getValueType() != MVT::f32)
58957 N->getOperand(0).getOperand(0));
58968 EVT VT =
N->getValueType(0);
58969 bool IsStrict =
N->isStrictFPOpcode();
58970 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
58971 EVT SrcVT = Src.getValueType();
58976 !IsStrict && Src.getOperand(0).getValueType() == VT)
58977 return Src.getOperand(0);
58982 assert(!IsStrict &&
"Strict FP doesn't support BF16");
58996 if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
58999 if (Subtarget.hasFP16())
59019 unsigned NumConcats = 8 / NumElts;
59029 std::max(4U, NumElts));
59033 {
N->getOperand(0), Src});
59040 assert(NumElts == 2 &&
"Unexpected size");
59066 "Unknown broadcast load type");
59069 if (
N->hasAnyUseOfValue(1))
59072 auto *MemIntrin = cast<MemIntrinsicSDNode>(
N);
59075 SDValue Chain = MemIntrin->getChain();
59076 EVT VT =
N->getSimpleValueType(0);
59077 EVT MemVT = MemIntrin->getMemoryVT();
59082 if (
User !=
N &&
User->getOpcode() ==
N->getOpcode() &&
59083 cast<MemIntrinsicSDNode>(
User)->getBasePtr() ==
Ptr &&
59084 cast<MemIntrinsicSDNode>(
User)->getChain() == Chain &&
59085 cast<MemIntrinsicSDNode>(
User)->getMemoryVT().getSizeInBits() ==
59087 !
User->hasAnyUseOfValue(1) &&
59100 if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
59103 bool IsStrict =
N->isStrictFPOpcode();
59104 EVT VT =
N->getValueType(0);
59105 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
59106 EVT SrcVT = Src.getValueType();
59116 if (Subtarget.hasFP16()) {
59122 Src.getNumOperands() == 2) {
59124 SDValue Op0 = Src.getOperand(0);
59125 SDValue Op1 = Src.getOperand(1);
59132 int Mask[8] = {0, 1, 2, 3, 8, 9, 10, 11};
59134 assert(IsOp0Strict &&
"Op0 must be strict node");
59138 Cvt0 = DAG.
getNode(Opc, dl, {MVT::v8f16, MVT::Other},
59140 Cvt1 = DAG.
getNode(Opc, dl, {MVT::v8f16, MVT::Other},
59168 {
N->getOperand(0), Src, Rnd});
59194 LoadSDNode *LN = cast<LoadSDNode>(Src.getNode());
59212 unsigned NumBits =
N->getSimpleValueType(0).getSizeInBits();
59226 bool MadeChange =
false, CastReturnVal =
false;
59228 for (
const SDValue &Arg :
N->op_values()) {
59231 Args.push_back(DAG.
getBitcast(MVT::x86mmx, Arg));
59233 Args.push_back(Arg);
59237 if (VTs.
NumVTs > 0 && VTs.
VTs[0] == MVT::v1i64) {
59239 NewVTArr[0] = MVT::x86mmx;
59242 CastReturnVal =
true;
59247 if (CastReturnVal) {
59249 for (
unsigned i = 0, e = Result->getNumValues(); i != e; ++i)
59251 Returns[0] = DAG.
getBitcast(MVT::v1i64, Returns[0]);
59263 unsigned IntNo =
N->getConstantOperandVal(0);
59277 unsigned IntNo =
N->getConstantOperandVal(1);
59291 unsigned IntNo =
N->getConstantOperandVal(1);
59303 switch (
N->getOpcode()) {
59502 return Subtarget.
canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64);
59532 if (VT == MVT::i16) {
59553 return Subtarget.hasNDD();
59566 Metadata *IsCFProtectionSupported = M->getModuleFlag(
"cf-protection-branch");
59567 if (IsCFProtectionSupported) {
59606 EVT VT =
Op.getValueType();
59607 bool Is8BitMulByConstant = VT == MVT::i8 &&
Op.getOpcode() ==
ISD::MUL &&
59608 isa<ConstantSDNode>(
Op.getOperand(1));
59614 if (VT != MVT::i16 && !Is8BitMulByConstant)
59618 if (!
Op.hasOneUse())
59623 auto *Ld = cast<LoadSDNode>(Load);
59624 auto *St = cast<StoreSDNode>(
User);
59625 return Ld->getBasePtr() == St->getBasePtr();
59631 if (!
Op.hasOneUse())
59636 auto *Ld = cast<AtomicSDNode>(Load);
59637 auto *St = cast<AtomicSDNode>(
User);
59638 return Ld->getBasePtr() == St->getBasePtr();
59642 if (!
Op.hasOneUse())
59645 EVT VT =
User->getValueType(0);
59647 (VT == MVT::i32 || VT == MVT::i64));
59650 bool Commute =
false;
59651 switch (
Op.getOpcode()) {
59652 default:
return false;
59669 if (Subtarget.hasZU() && IsFoldableZext(
Op) &&
59670 (isa<ConstantSDNode>(
Op.getOperand(0)) ||
59671 isa<ConstantSDNode>(
Op.getOperand(1))))
59685 (!Commute || !isa<ConstantSDNode>(N0) ||
59686 (
Op.getOpcode() !=
ISD::MUL && IsFoldableRMW(N1,
Op))))
59689 ((Commute && !isa<ConstantSDNode>(N1)) ||
59690 (
Op.getOpcode() !=
ISD::MUL && IsFoldableRMW(N0,
Op))))
59692 if (IsFoldableAtomicRMW(N0,
Op) ||
59693 (Commute && IsFoldableAtomicRMW(N1,
Op)))
59714 S = S.
substr(Piece.size());
59727 if (AsmPieces.
size() == 3 || AsmPieces.
size() == 4) {
59732 if (AsmPieces.
size() == 3)
59744 const std::string &AsmStr = IA->getAsmString();
59752 SplitString(AsmStr, AsmPieces,
";\n");
59754 switch (AsmPieces.
size()) {
59755 default:
return false;
59762 if (
matchAsm(AsmPieces[0], {
"bswap",
"$0"}) ||
59763 matchAsm(AsmPieces[0], {
"bswapl",
"$0"}) ||
59764 matchAsm(AsmPieces[0], {
"bswapq",
"$0"}) ||
59765 matchAsm(AsmPieces[0], {
"bswap",
"${0:q}"}) ||
59766 matchAsm(AsmPieces[0], {
"bswapl",
"${0:q}"}) ||
59767 matchAsm(AsmPieces[0], {
"bswapq",
"${0:q}"})) {
59775 IA->getConstraintString().compare(0, 5,
"=r,0,") == 0 &&
59776 (
matchAsm(AsmPieces[0], {
"rorw",
"$$8,",
"${0:w}"}) ||
59777 matchAsm(AsmPieces[0], {
"rolw",
"$$8,",
"${0:w}"}))) {
59779 StringRef ConstraintsStr = IA->getConstraintString();
59788 IA->getConstraintString().compare(0, 5,
"=r,0,") == 0 &&
59789 matchAsm(AsmPieces[0], {
"rorw",
"$$8,",
"${0:w}"}) &&
59790 matchAsm(AsmPieces[1], {
"rorl",
"$$16,",
"$0"}) &&
59791 matchAsm(AsmPieces[2], {
"rorw",
"$$8,",
"${0:w}"})) {
59793 StringRef ConstraintsStr = IA->getConstraintString();
59802 if (Constraints.size() >= 2 &&
59803 Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] ==
"A" &&
59804 Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] ==
"0") {
59806 if (
matchAsm(AsmPieces[0], {
"bswap",
"%eax"}) &&
59807 matchAsm(AsmPieces[1], {
"bswap",
"%edx"}) &&
59808 matchAsm(AsmPieces[2], {
"xchgl",
"%eax,",
"%edx"}))
59854 if (Constraint.
size() == 1) {
59855 switch (Constraint[0]) {
59892 else if (Constraint.
size() == 2) {
59893 switch (Constraint[0]) {
59897 if (Constraint[1] !=
's')
59901 switch (Constraint[1]) {
59915 switch (Constraint[1]) {
59935 Value *CallOperandVal =
Info.CallOperandVal;
59938 if (!CallOperandVal)
59942 switch (*Constraint) {
59972 switch (Constraint[1]) {
60004 switch (Constraint[1]) {
60029 if (
auto *
C = dyn_cast<ConstantInt>(
Info.CallOperandVal))
60030 if (
C->getZExtValue() <= 31)
60034 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60035 if (
C->getZExtValue() <= 63)
60039 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60040 if ((
C->getSExtValue() >= -0x80) && (
C->getSExtValue() <= 0x7f))
60044 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60045 if ((
C->getZExtValue() == 0xff) || (
C->getZExtValue() == 0xffff))
60049 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60050 if (
C->getZExtValue() <= 3)
60054 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60055 if (
C->getZExtValue() <= 0xff)
60060 if (isa<ConstantFP>(CallOperandVal))
60064 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60065 if ((
C->getSExtValue() >= -0x80000000LL) &&
60066 (
C->getSExtValue() <= 0x7fffffffLL))
60070 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60071 if (
C->getZExtValue() <= 0xffffffff)
60123 std::vector<SDValue> &Ops,
60126 char ConstraintLetter = Constraint[0];
60127 switch (ConstraintLetter) {
60130 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60131 if (
C->getZExtValue() <= 31) {
60133 Op.getValueType());
60139 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60140 if (
C->getZExtValue() <= 63) {
60142 Op.getValueType());
60148 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60149 if (isInt<8>(
C->getSExtValue())) {
60151 Op.getValueType());
60157 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60158 if (
C->getZExtValue() == 0xff ||
C->getZExtValue() == 0xffff ||
60159 (Subtarget.is64Bit() &&
C->getZExtValue() == 0xffffffff)) {
60161 Op.getValueType());
60167 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60168 if (
C->getZExtValue() <= 3) {
60170 Op.getValueType());
60176 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60177 if (
C->getZExtValue() <= 255) {
60179 Op.getValueType());
60185 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60186 if (
C->getZExtValue() <= 127) {
60188 Op.getValueType());
60195 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60197 C->getSExtValue())) {
60208 assert(Constraint[1] ==
's');
60211 if (
const auto *BA = dyn_cast<BlockAddressSDNode>(
Op)) {
60213 BA->getValueType(0)));
60217 isa<ConstantSDNode>(
Op->getOperand(1))) {
60218 Offset = cast<ConstantSDNode>(
Op->getOperand(1))->getSExtValue();
60219 Op =
Op->getOperand(0);
60221 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(
Op))
60229 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60231 C->getZExtValue())) {
60233 Op.getValueType());
60243 if (
auto *CST = dyn_cast<ConstantSDNode>(
Op)) {
60244 bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
60249 : CST->getSExtValue();
60258 !(isa<BlockAddressSDNode>(
Op) || isa<BasicBlockSDNode>(
Op)))
60263 if (
auto *GA = dyn_cast<GlobalAddressSDNode>(
Op))
60273 if (Result.getNode()) {
60274 Ops.push_back(Result);
60314 return Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32();
60317std::pair<unsigned, const TargetRegisterClass *>
60323 if (Constraint.
size() == 1) {
60325 switch (Constraint[0]) {
60329 if (Subtarget.is64Bit())
60330 return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
60331 assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
60332 "Expecting 64, 32 or 16 bit subtarget");
60333 return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
60340 if (VT == MVT::v1i1 || VT == MVT::i1)
60341 return std::make_pair(0U, &X86::VK1RegClass);
60342 if (VT == MVT::v8i1 || VT == MVT::i8)
60343 return std::make_pair(0U, &X86::VK8RegClass);
60344 if (VT == MVT::v16i1 || VT == MVT::i16)
60345 return std::make_pair(0U, &X86::VK16RegClass);
60347 if (Subtarget.hasBWI()) {
60348 if (VT == MVT::v32i1 || VT == MVT::i32)
60349 return std::make_pair(0U, &X86::VK32RegClass);
60350 if (VT == MVT::v64i1 || VT == MVT::i64)
60351 return std::make_pair(0U, &X86::VK64RegClass);
60355 if (Subtarget.is64Bit()) {
60356 if (VT == MVT::i8 || VT == MVT::i1)
60358 ? &X86::GR8RegClass
60359 : &X86::GR8_NOREX2RegClass);
60360 if (VT == MVT::i16)
60362 ? &X86::GR16RegClass
60363 : &X86::GR16_NOREX2RegClass);
60364 if (VT == MVT::i32 || VT == MVT::f32)
60366 ? &X86::GR32RegClass
60367 : &X86::GR32_NOREX2RegClass);
60368 if (VT != MVT::f80 && !VT.
isVector())
60370 ? &X86::GR64RegClass
60371 : &X86::GR64_NOREX2RegClass);
60377 if (VT == MVT::i8 || VT == MVT::i1)
60378 return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
60379 if (VT == MVT::i16)
60380 return std::make_pair(0U, &X86::GR16_ABCDRegClass);
60381 if (VT == MVT::i32 || VT == MVT::f32 ||
60382 (!VT.
isVector() && !Subtarget.is64Bit()))
60383 return std::make_pair(0U, &X86::GR32_ABCDRegClass);
60384 if (VT != MVT::f80 && !VT.
isVector())
60385 return std::make_pair(0U, &X86::GR64_ABCDRegClass);
60389 if (VT == MVT::i8 || VT == MVT::i1)
60391 ? &X86::GR8RegClass
60392 : &X86::GR8_NOREX2RegClass);
60393 if (VT == MVT::i16)
60395 ? &X86::GR16RegClass
60396 : &X86::GR16_NOREX2RegClass);
60397 if (VT == MVT::i32 || VT == MVT::f32 ||
60398 (!VT.
isVector() && !Subtarget.is64Bit()))
60400 ? &X86::GR32RegClass
60401 : &X86::GR32_NOREX2RegClass);
60402 if (VT != MVT::f80 && !VT.
isVector())
60404 ? &X86::GR64RegClass
60405 : &X86::GR64_NOREX2RegClass);
60408 if (VT == MVT::i8 || VT == MVT::i1)
60409 return std::make_pair(0U, &X86::GR8_NOREXRegClass);
60410 if (VT == MVT::i16)
60411 return std::make_pair(0U, &X86::GR16_NOREXRegClass);
60412 if (VT == MVT::i32 || VT == MVT::f32 ||
60413 (!VT.
isVector() && !Subtarget.is64Bit()))
60414 return std::make_pair(0U, &X86::GR32_NOREXRegClass);
60415 if (VT != MVT::f80 && !VT.
isVector())
60416 return std::make_pair(0U, &X86::GR64_NOREXRegClass);
60422 return std::make_pair(0U, &X86::RFP32RegClass);
60424 return std::make_pair(0U, &X86::RFP64RegClass);
60425 if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)
60426 return std::make_pair(0U, &X86::RFP80RegClass);
60429 if (!Subtarget.hasMMX())
break;
60430 return std::make_pair(0U, &X86::VR64RegClass);
60433 if (!Subtarget.
hasSSE1())
break;
60434 bool VConstraint = (Constraint[0] ==
'v');
60440 if (VConstraint && Subtarget.hasFP16())
60441 return std::make_pair(0U, &X86::FR16XRegClass);
60445 if (VConstraint && Subtarget.hasVLX())
60446 return std::make_pair(0U, &X86::FR32XRegClass);
60447 return std::make_pair(0U, &X86::FR32RegClass);
60450 if (VConstraint && Subtarget.hasVLX())
60451 return std::make_pair(0U, &X86::FR64XRegClass);
60452 return std::make_pair(0U, &X86::FR64RegClass);
60454 if (Subtarget.is64Bit()) {
60455 if (VConstraint && Subtarget.hasVLX())
60456 return std::make_pair(0U, &X86::VR128XRegClass);
60457 return std::make_pair(0U, &X86::VR128RegClass);
60462 if (!Subtarget.hasFP16())
60465 return std::make_pair(0U, &X86::VR128XRegClass);
60466 return std::make_pair(0U, &X86::VR128RegClass);
60468 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60471 return std::make_pair(0U, &X86::VR128XRegClass);
60472 return std::make_pair(0U, &X86::VR128RegClass);
60480 if (VConstraint && Subtarget.hasVLX())
60481 return std::make_pair(0U, &X86::VR128XRegClass);
60482 return std::make_pair(0U, &X86::VR128RegClass);
60485 if (!Subtarget.hasFP16())
60488 return std::make_pair(0U, &X86::VR256XRegClass);
60489 return std::make_pair(0U, &X86::VR256RegClass);
60491 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60494 return std::make_pair(0U, &X86::VR256XRegClass);
60495 return std::make_pair(0U, &X86::VR256RegClass);
60502 if (VConstraint && Subtarget.hasVLX())
60503 return std::make_pair(0U, &X86::VR256XRegClass);
60505 return std::make_pair(0U, &X86::VR256RegClass);
60508 if (!Subtarget.hasFP16())
60511 return std::make_pair(0U, &X86::VR512RegClass);
60512 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60514 if (!Subtarget.hasBF16())
60517 return std::make_pair(0U, &X86::VR512RegClass);
60518 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60527 return std::make_pair(0U, &X86::VR512RegClass);
60528 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60532 }
else if (Constraint.
size() == 2 && Constraint[0] ==
'Y') {
60533 switch (Constraint[1]) {
60541 if (!Subtarget.hasMMX())
break;
60542 return std::make_pair(0U, &X86::VR64RegClass);
60544 if (!Subtarget.
hasSSE1())
break;
60549 if (!Subtarget.hasFP16())
60551 return std::make_pair(X86::XMM0, &X86::FR16XRegClass);
60554 return std::make_pair(X86::XMM0, &X86::FR32RegClass);
60557 return std::make_pair(X86::XMM0, &X86::FR64RegClass);
60559 if (!Subtarget.hasFP16())
60561 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60563 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60565 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60573 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60576 if (!Subtarget.hasFP16())
60578 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60580 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60582 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60590 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60593 if (!Subtarget.hasFP16())
60595 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60597 if (!Subtarget.hasBF16())
60599 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60607 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60614 if (VT == MVT::v1i1 || VT == MVT::i1)
60615 return std::make_pair(0U, &X86::VK1WMRegClass);
60616 if (VT == MVT::v8i1 || VT == MVT::i8)
60617 return std::make_pair(0U, &X86::VK8WMRegClass);
60618 if (VT == MVT::v16i1 || VT == MVT::i16)
60619 return std::make_pair(0U, &X86::VK16WMRegClass);
60621 if (Subtarget.hasBWI()) {
60622 if (VT == MVT::v32i1 || VT == MVT::i32)
60623 return std::make_pair(0U, &X86::VK32WMRegClass);
60624 if (VT == MVT::v64i1 || VT == MVT::i64)
60625 return std::make_pair(0U, &X86::VK64WMRegClass);
60629 }
else if (Constraint.
size() == 2 && Constraint[0] ==
'j') {
60630 switch (Constraint[1]) {
60634 if (VT == MVT::i8 || VT == MVT::i1)
60635 return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
60636 if (VT == MVT::i16)
60637 return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
60638 if (VT == MVT::i32 || VT == MVT::f32)
60639 return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
60640 if (VT != MVT::f80 && !VT.
isVector())
60641 return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
60644 if (VT == MVT::i8 || VT == MVT::i1)
60645 return std::make_pair(0U, &X86::GR8RegClass);
60646 if (VT == MVT::i16)
60647 return std::make_pair(0U, &X86::GR16RegClass);
60648 if (VT == MVT::i32 || VT == MVT::f32)
60649 return std::make_pair(0U, &X86::GR32RegClass);
60650 if (VT != MVT::f80 && !VT.
isVector())
60651 return std::make_pair(0U, &X86::GR64RegClass);
60657 return std::make_pair(0U, &X86::GR32RegClass);
60661 std::pair<Register, const TargetRegisterClass*> Res;
60668 if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
60670 if (Constraint.
size() == 7 && Constraint[0] ==
'{' &&
60671 tolower(Constraint[1]) ==
's' && tolower(Constraint[2]) ==
't' &&
60672 Constraint[3] ==
'(' &&
60673 (Constraint[4] >=
'0' && Constraint[4] <=
'7') &&
60674 Constraint[5] ==
')' && Constraint[6] ==
'}') {
60677 if (Constraint[4] ==
'7')
60678 return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
60679 return std::make_pair(X86::FP0 + Constraint[4] -
'0',
60680 &X86::RFP80RegClass);
60684 if (
StringRef(
"{st}").equals_insensitive(Constraint))
60685 return std::make_pair(X86::FP0, &X86::RFP80RegClass);
60689 if (
StringRef(
"{flags}").equals_insensitive(Constraint))
60690 return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
60694 if (
StringRef(
"{dirflag}").equals_insensitive(Constraint) &&
60696 return std::make_pair(X86::DF, &X86::DFCCRRegClass);
60700 if (
StringRef(
"{fpsr}").equals_insensitive(Constraint) && VT == MVT::Other)
60701 return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
60707 if (!Subtarget.is64Bit() &&
60709 TRI->getEncodingValue(Res.first) >= 8) {
60711 return std::make_pair(0,
nullptr);
60716 TRI->getEncodingValue(Res.first) & 0x10) {
60718 return std::make_pair(0,
nullptr);
60725 if (
TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
60740 return std::make_pair(0,
nullptr);
60743 bool is64Bit = Subtarget.is64Bit();
60745 Size == 8 ? (
is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
60746 :
Size == 16 ? (
is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
60747 :
Size == 32 ? (
is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
60748 : (
is64Bit ? &X86::GR64RegClass :
nullptr);
60754 return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
60756 return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
60758 return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
60760 return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
60762 return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
60764 return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
60766 return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
60768 return std::make_pair(0,
nullptr);
60772 return std::make_pair(DestReg, RC);
60776 return std::make_pair(0,
nullptr);
60784 if (VT == MVT::f16)
60785 Res.second = &X86::FR16XRegClass;
60786 else if (VT == MVT::f32 || VT == MVT::i32)
60787 Res.second = &X86::FR32XRegClass;
60788 else if (VT == MVT::f64 || VT == MVT::i64)
60789 Res.second = &X86::FR64XRegClass;
60790 else if (
TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))
60791 Res.second = &X86::VR128XRegClass;
60792 else if (
TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))
60793 Res.second = &X86::VR256XRegClass;
60794 else if (
TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
60795 Res.second = &X86::VR512RegClass;
60799 Res.second =
nullptr;
60802 if (VT == MVT::v1i1 || VT == MVT::i1)
60803 Res.second = &X86::VK1RegClass;
60804 else if (VT == MVT::v8i1 || VT == MVT::i8)
60805 Res.second = &X86::VK8RegClass;
60806 else if (VT == MVT::v16i1 || VT == MVT::i16)
60807 Res.second = &X86::VK16RegClass;
60808 else if (VT == MVT::v32i1 || VT == MVT::i32)
60809 Res.second = &X86::VK32RegClass;
60810 else if (VT == MVT::v64i1 || VT == MVT::i64)
60811 Res.second = &X86::VK64RegClass;
60815 Res.second =
nullptr;
60830 bool OptSize = Attr.
hasFnAttr(Attribute::MinSize);
60835 if (!Subtarget.is64Bit())
60844void X86TargetLowering::insertCopiesSplitCSR(
60848 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
60858 RC = &X86::GR64RegClass;
60869 Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
60870 "Function should be nounwind in insertCopiesSplitCSR!");
60871 Entry->addLiveIn(*
I);
60876 for (
auto *Exit : Exits)
60878 TII->get(TargetOpcode::COPY), *
I)
60884 return Subtarget.is64Bit();
60892 "Invalid call instruction for a KCFI check");
60897 switch (
MBBI->getOpcode()) {
60899 case X86::CALL64m_NT:
60900 case X86::TAILJMPm64:
60901 case X86::TAILJMPm64_REX: {
60904 if (!
TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11,
true,
60907 for (
auto *NewMI : NewMIs)
60910 "Unexpected instruction after memory operand unfolding");
60911 if (OrigCall->shouldUpdateAdditionalCallInfo())
60913 MBBI->setCFIType(MF, OrigCall->getCFIType());
60923 switch (
MBBI->getOpcode()) {
60925 case X86::CALL64r_NT:
60926 case X86::TAILJMPr64:
60927 case X86::TAILJMPr64_REX:
60928 assert(
Target.isReg() &&
"Unexpected target operand for an indirect call");
60929 Target.setIsRenamable(
false);
60930 TargetReg =
Target.getReg();
60932 case X86::CALL64pcrel32:
60933 case X86::TAILJMPd64:
60934 assert(
Target.isSymbol() &&
"Unexpected target operand for a direct call");
60938 "Unexpected register for an indirect thunk call");
60939 TargetReg = X86::R11;
60993 if (Subtarget.is64Bit())
61007 if (
ML &&
ML->isInnermost() &&
unsigned const MachineRegisterInfo * MRI
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
AMDGPU Register Bank Select
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static int matchShuffleAsBitRotate(ArrayRef< int > Mask, int NumSubElts)
Try to lower a vector shuffle as a bit rotation.
static Value * LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctlz of V before the specified instruction IP.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc)
Return true if node is an ISD::AND or ISD::OR of two M68k::SETcc nodes each of which has no other use...
static bool hasNonFlagsUse(SDValue Op)
return true if Op has a use that doesn't just read flags.
static bool isCMOVPseudo(MachineInstr &MI)
static SDValue combineCarryThroughADD(SDValue CCR)
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG)
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
PowerPC TLS Dynamic Call Fixup
static constexpr Register SPReg
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static KnownBits computeKnownBitsForHorizontalOperation(const Operator *I, const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, const function_ref< KnownBits(const KnownBits &, const KnownBits &)> KnownBitsFunc)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
static unsigned getSUBriOpcode(bool IsLP64)
static SDValue convertIntLogicToFPLogic(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If both input operands of a logic op are being cast from floating-point types or FP compares,...
static bool isNoopOrBroadcastShuffleMask(ArrayRef< int > Mask)
static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask)
static MVT widenMaskVectorType(MVT VT, const X86Subtarget &Subtarget)
Widen a mask vector type to a minimum of v8i1/v16i1 to allow use of KSHIFT and bitcast with integer t...
static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, const SDValue &Zext1, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, SDValue X, SDValue Y, SelectionDAG &DAG, bool ZeroSecondOpOnly=false)
If this is an add or subtract where one operand is produced by a cmp+setcc, then try to convert it to...
static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, SmallVectorImpl< SDValue > &SrcOps, SmallVectorImpl< APInt > *SrcMask=nullptr)
Helper for matching BINOP(EXTRACTELT(X,0),BINOP(EXTRACTELT(X,1),...)) style scalarized (associative) ...
static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1, bool &IsAlwaysSignaling)
Turns an ISD::CondCode into a value suitable for SSE floating-point mask CMPs.
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC)
static bool useEGPRInlineAsm(const X86Subtarget &Subtarget)
static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If a value is a scalar FP zero or a vector FP zero (potentially including undefined elements),...
static bool matchBinaryPermuteShuffle(MVT MaskVT, ArrayRef< int > Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm)
static SDValue combineSub(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isGRClass(const TargetRegisterClass &RC)
Check if RC is a general purpose register class.
static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero, SmallVectorImpl< SDValue > &Ops, SmallVectorImpl< int > &Mask, bool &IsUnary)
Calculates the shuffle mask corresponding to the target-specific opcode.
static SDValue vectorizeExtractedCast(SDValue Cast, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Given a scalar cast operation that is extracted from a vector, try to vectorize the cast op followed ...
static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG)
static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, const SDLoc &DL, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
This is a helper function of LowerToHorizontalOp().
static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, const SDLoc &dl, SelectionDAG &DAG)
static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef< int > HalfMask, int HalfIdx1, int HalfIdx2, bool UndefLower, SelectionDAG &DAG, bool UseConcat=false)
Given the output values from getHalfShuffleMask(), create a half width shuffle of extracted vectors f...
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, SDValue ShAmt, int ShAmtIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle vector element shifts by a splat shift amount.
static SDValue combineZext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc, bool NSW)
Given a buildvector constant, return a new vector constant with each element incremented or decrement...
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, unsigned &NumExtracts, bool &IsSubAdd)
Returns true iff BV builds a vector with the result equivalent to the result of ADDSUB/SUBADD operati...
static bool cheapX86FSETCC_SSE(ISD::CondCode SetCCOpcode)
static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 4-lane 32-bit floating point shuffles.
static MachineBasicBlock * emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII)
Utility function to emit xbegin specifying the start of an RTM region.
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef< SDValue > Elts, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsAfterLegalize)
Given the initializing elements 'Elts' of a vector of type 'VT', see if the elements can be replaced ...
static bool scaleShuffleElements(ArrayRef< int > Mask, unsigned NumDstElts, SmallVectorImpl< int > &ScaledMask)
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LoadGlobalBaseReg=false, bool LocalDynamic=false)
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
This function lowers a vector truncation of 'extended sign-bits' or 'extended zero-bits' values.
static cl::opt< int > BrMergingCcmpBias("x86-br-merging-ccmp-bias", cl::init(6), cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target " "supports conditional compare instructions."), cl::Hidden)
static APInt getExtractedDemandedElts(SDNode *N)
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 32-bit integer shuffles.
static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If we are inverting an PTEST/TESTP operand, attempt to adjust the CC to avoid the inversion.
static unsigned getAltBitOpcode(unsigned Opcode)
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Insert i1-subvector to i1-vector.
static SDValue materializeVectorConstant(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Create a vector constant without a load.
static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a shuffle with a single PSHUFB of V1 or V2.
static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
64-bit unsigned integer to double expansion.
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, const X86Subtarget &Subtarget)
static bool isX86CCSigned(unsigned X86CC)
Return true if the condition is an signed comparison operation.
static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a 128-bit shuffles.
static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget)
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on SELECT and VSELECT nodes.
static bool isUndefOrZeroInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is undef or ...
static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsAfterLegalize)
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getConstVector(ArrayRef< int > Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask=false)
static SDValue commuteSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static MachineInstrBuilder createPHIsForCMOVsInSinkBB(MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to put 128-bits into a vector > 128 bits.
static bool onlyZeroFlagUsed(SDValue Flags)
static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to grab 256-bits from a 512-bit vector.
static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineMulToPMADDWD(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleAsLanePermuteAndShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one source with a lane permutatio...
static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG)
static bool isFoldableUseOfShuffle(SDNode *N)
static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl< SDValue > &Inputs, SmallVectorImpl< int > &Mask, const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts)
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Return (and Op, Mask) for compare instructions or (vselect Mask, Op, PreservedSrc) for others along w...
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Truncate using inreg sign extension and X86ISD::PACKSS.
static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool isShuffleMaskInputInPlace(int Input, ArrayRef< int > Mask)
Test whether the specified input (0 or 1) is in-place blended by the given mask.
static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, unsigned ScalarSizeInBits, ArrayRef< int > Mask)
Test whether elements in each LaneSizeInBits lane in this shuffle mask come from multiple lanes - thi...
static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, ISD::CondCode Cond, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
As another special case, use PSUBUS[BW] when it's profitable.
static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each 128-bit lane.
static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS, unsigned X86CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineADC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static std::optional< unsigned > CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS, unsigned NumSignificantBitsRHS)
static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
static bool isShuffleFoldableLoad(SDValue V)
Helper to test for a load that can be folded with x86 shuffles.
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
static SDValue lowerShuffleAsElementInsertion(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower insertion of a single element into a zero vector.
static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUnpackWdShuffleMask(ArrayRef< int > Mask, MVT VT, const SelectionDAG &DAG)
static SDValue LowerTruncateVecPack(MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
This function lowers a vector truncation from vXi32/vXi64 to vXi8/vXi16 into X86ISD::PACKUS/X86ISD::P...
static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle case where shuffle sources are coming from the same 128-bit lane and every lane can be represe...
static SDValue getSHUFPDImmForMask(ArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static int getSEHRegistrationNodeSize(const Function *Fn)
static SDValue combineShuffleOfConcatUndef(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Creates an SDNode for a predicated scalar operation.
static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
If a BUILD_VECTOR's source elements all apply the same bit operation and one of their operands is con...
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth=0)
Returns the negated value if the node N flips sign of FP value.
static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, ArrayRef< int > OriginalMask, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 16-bit integer shuffles.
static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower atomic_load_ops into LOCK-prefixed operations.
static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 32-lane 8-bit integer shuffles.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static void computeKnownBitsForPMADDWD(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG)
static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, int BroadcastIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower broadcast of a single - truncated - integer element, coming from a scalar_to_vector/buil...
static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI)
Emit a sequence of two 128-bit horizontal add/sub followed by a concat_vector.
static SDValue combineBitOpWithPACK(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT, unsigned Amt=0)
static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to fold: and (vector_shuffle<Z,...,Z> (insert_vector_elt undef, (xor X, -1), Z),...
static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to emit a bitmask instruction for a shuffle.
static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit) followed by unpack 256-bit.
static bool is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each 256-bit lane.
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl, SDValue V1, SDValue V2, ArrayRef< int > Mask)
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerUINT_TO_FP_i32(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
32-bit unsigned integer to float expansion.
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static cl::opt< int > ExperimentalPrefInnermostLoopAlignment("x86-experimental-pref-innermost-loop-alignment", cl::init(4), cl::desc("Sets the preferable loop alignment for experiments (as log2 bytes) " "for innermost loops only. If specified, this option overrides " "alignment set by x86-experimental-pref-loop-alignment."), cl::Hidden)
static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute from a vector of source v...
static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, const SDLoc &DL, SelectionDAG &DAG, unsigned HOpcode, SDValue V0, SDValue V1)
static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower a vector shuffle as a zero or any extension.
static bool needCarryOrOverflowFlag(SDValue Flags)
static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
Returns a vector of specified type with all bits set.
static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUndefLowerHalf(ArrayRef< int > Mask)
Return true if the mask creates a vector whose lower half is undefined.
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineOrXorWithSETCC(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue combineRedundantDWordShuffle(SDValue N, MutableArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
Search for a combinable shuffle across a chain ending in pshufd.
static SDValue getBMIMatchingOp(unsigned Opc, SelectionDAG &DAG, SDValue OpMustEq, SDValue Op, unsigned Depth)
static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
Handle vector element shifts where the shift amount is a constant.
static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS, bool PackHiHalf=false)
Returns a node that packs the LHS + RHS nodes together at half width.
static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG)
static bool matchUnaryShuffle(MVT MaskVT, ArrayRef< int > Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue V1, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT)
static bool isConstantPowerOf2(SDValue V, unsigned EltSizeInBIts, bool AllowUndefs)
static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Given a scalar cast to FP with a cast to integer operand (almost an ftrunc), try to vectorize the cas...
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static bool getHalfShuffleMask(ArrayRef< int > Mask, MutableArrayRef< int > HalfMask, int &HalfIdx1, int &HalfIdx2)
If the input shuffle mask results in a vector that is undefined in all upper or lower half elements a...
static cl::opt< int > BrMergingBaseCostThresh("x86-br-merging-base-cost", cl::init(2), cl::desc("Sets the cost threshold for when multiple conditionals will be merged " "into one branch versus be split in multiple branches. Merging " "conditionals saves branches at the cost of additional instructions. " "This value sets the instruction cost limit, below which conditionals " "will be merged, and above which conditionals will be split. Set to -1 " "to never merge branches."), cl::Hidden)
static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SmallVectorImpl< int > &Mask, SmallVectorImpl< SDValue > &Ops, const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts)
static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT)
static SDValue emitLockedStackOp(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Chain, const SDLoc &DL)
Emit a locked operation on a stack location which does not change any memory location,...
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, bool &ForceV1Zero, bool &ForceV2Zero, unsigned &ShuffleImm, ArrayRef< int > Mask, const APInt &Zeroable)
static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 8-lane 16-bit floating point shuffles.
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Try to emit a blend instruction for a shuffle using bit math.
static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If exactly one element of the mask is set for a non-extending masked load, it is a scalar load and ve...
static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, unsigned TargetOpcode, unsigned SrcReg, const X86Subtarget &Subtarget, SmallVectorImpl< SDValue > &Results)
Handles the lowering of builtin intrinsics with chain that return their value into registers EDX:EAX.
static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef< int > Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm)
static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI)
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If this is a dynamic select (non-constant condition) and we can match this node with one of the varia...
static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBuildVectorAsInsert(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, unsigned EltSizeInBits, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a target shuffle mask is equivalent within each sub-lane.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to map a 128-bit or larger integer comparison to vector instructions before type legalization spl...
static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits, unsigned ScalarSizeInBits, ArrayRef< int > Mask)
Test whether there are elements crossing LaneSizeInBits lanes in this shuffle mask.
static SDValue FixupMMXIntrinsicTypes(SDNode *N, SelectionDAG &DAG)
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, X86::CondCode &X86CC)
Result of 'and' is compared against zero.
static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsZeroOrAnyExtend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a zero extension on any microarch.
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBitOpWithShift(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Compute the horizontal sum of bytes in V for the elements of VT.
static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned Depth=0)
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG)
static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG)
static SDValue lowerShuffleWithEXPAND(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static void computeInLaneShuffleMask(const ArrayRef< int > &Mask, int LaneSize, SmallVector< int > &InLaneMask)
Helper to get compute inlane shuffle mask for a complete shuffle mask.
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG)
static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineTESTP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, EVT MemVT, MemSDNode *Mem, unsigned Offset, SelectionDAG &DAG)
static bool isUndefUpperHalf(ArrayRef< int > Mask)
Return true if the mask creates a vector whose upper half is undefined.
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt=0)
static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG)
Lower SRA_PARTS and friends, which return two i32 values and take a 2 x i32 value to shift plus a shi...
static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode)
static std::pair< SDValue, SDValue > getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG)
static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs reference the same FP CMP,...
static bool isVKClass(const TargetRegisterClass &RC)
Check if RC is a mask register class.
static int canLowerByDroppingElements(ArrayRef< int > Mask, bool MatchEven, bool IsSingleInput)
Check whether a compaction lowering can be done by dropping even/odd elements and compute how many ti...
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
Attempt to pre-truncate inputs to arithmetic ops if it will simplify the codegen.
static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower broadcast of a single element.
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static void resolveTargetShuffleInputsAndMask(SmallVectorImpl< SDValue > &Inputs, SmallVectorImpl< int > &Mask)
Removes unused/repeated shuffle source inputs and adjusts the shuffle mask.
static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue combineBitOpWithMOVMSK(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to combine a shuffle into a target-specific add-sub or mul-add-sub node.
static SDValue lowerShuffleAsLanePermuteAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector shuffle crossing multiple 128-bit lanes as a lane permutation followed by a per-lane p...
static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic lowering of 8-lane i16 shuffles.
static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue In, SelectionDAG &DAG)
static bool canonicalizeShuffleMaskWithCommute(ArrayRef< int > Mask)
Helper function that returns true if the shuffle mask should be commuted to improve canonicalization.
static bool matchAsm(StringRef S, ArrayRef< const char * > Pieces)
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
static SDValue splitVSETCC(EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SelectionDAG &DAG, const SDLoc &dl)
Break a VSETCC 256/512-bit vector into two new 128/256 ones and then concatenate the result back.
static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG)
Change a vector store into a pair of half-size vector stores.
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Widen a vector to a larger size with the same scalar type, with the new elements either zero or undef...
static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static bool isUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is the undef...
static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG, X86::CondCode &X86CC)
static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FANDN nodes.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit, bool isPIC)
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, SDValue And1_L, SDValue And1_R, const SDLoc &DL, SelectionDAG &DAG)
static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineToExtendBoolVectorInReg(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break a binary integer operation into 2 half sized ops and then concatenate the result back.
static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static LLVM_ATTRIBUTE_UNUSED bool isBlendOrUndef(ArrayRef< int > Mask)
Return true if every element in Mask, is an in-place blend/select mask or is undef.
static const char * getIndirectThunkSymbol(const X86Subtarget &Subtarget, unsigned Reg)
static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static unsigned getV4X86ShuffleImm(ArrayRef< int > Mask)
Get a 4-lane 8-bit shuffle immediate for a mask.
static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void resolveTargetShuffleFromZeroables(SmallVectorImpl< int > &Mask, const APInt &KnownUndef, const APInt &KnownZero, bool ResolveKnownZeros=true)
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Insert one bit to mask vector, like v16i1 or v8i1.
static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower a vector shuffle by first fixing the 128-bit lanes and then shuffling each lane.
static bool isSoftF16(T VT, const X86Subtarget &Subtarget)
static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Detect vector gather/scatter index generation and convert it from being a bunch of shuffles and extra...
static bool isSingleSHUFPSMask(ArrayRef< int > Mask)
Test whether this can be lowered with a single SHUFPS instruction.
static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG)
static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd)
Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
static bool isX86LogicalCmp(SDValue Op)
Return true if opcode is a X86 logical comparison.
static bool isAnyInRange(ArrayRef< int > Mask, int Low, int Hi)
Return true if the value of any element in Mask falls within the specified range (L,...
static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static cl::opt< bool > WidenShift("x86-widen-shift", cl::init(true), cl::desc("Replace narrow shifts with wider shifts."), cl::Hidden)
static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG)
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, unsigned Depth)
static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS=false)
Detect patterns of truncation with signed saturation: (truncate (smin ((smax (x, signed_min_of_dest_t...
const unsigned FPStateSize
static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, unsigned &UnpackOpcode, bool IsUnary, ArrayRef< int > TargetMask, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on floating point negations.
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineXorSubCTLZ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl, unsigned vectorWidth)
static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned &HOpcode, SDValue &V0, SDValue &V1)
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool createShuffleMaskFromVSELECT(SmallVectorImpl< int > &Mask, SDValue Cond, bool IsBLENDV=false)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, bool AllowTruncate)
static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Helper to determine if In truncated to DstVT has the necessary signbits / leading zero bits to be tru...
static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Return Mask with the necessary casting or extending for Mask according to MaskVT when lowering maskin...
static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 64-bit floating point shuffles.
static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Horizontal vector math instructions may be slower than normal math with shuffles.
static bool isFRClass(const TargetRegisterClass &RC)
Check if RC is a vector register class.
static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, bool SimpleOnly)
Generic routine to split vector shuffle into half-sized shuffles.
static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue IsNOT(SDValue V, SelectionDAG &DAG)
static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG)
Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Emit nodes that will be selected as "test Op0,Op0", or something equivalent.
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &dl)
Return a vector logical shift node.
static bool isFreeToSplitVector(SDNode *N, SelectionDAG &DAG)
static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineINTRINSIC_WO_CHAIN(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 4-lane i32 vector shuffles.
static SDValue combineX86ShuffleChain(ArrayRef< SDValue > Inputs, SDValue Root, ArrayRef< int > BaseMask, int Depth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Combine an arbitrary chain of shuffles into a single instruction if possible.
static SDValue widenMaskVector(SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Widen a mask vector to a minimum of v8i1/v16i1 to allow use of KSHIFT and bitcast with integer types.
static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, SelectionDAG &DAG)
static bool isInRange(int Val, int Low, int Hi)
Return true if Val falls within the specified range (L, H].
static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Try to combine x86 target specific shuffles.
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > splitVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG)
Helper for attempting to create a X86ISD::BT node.
static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG)
Emit Truncating Store with signed or unsigned saturation.
static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, bool FillWithZeroes=false)
Widen a vector input to a vector of NVT.
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG)
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, bool ImmBlends=false)
Try to lower as a blend of elements from two inputs followed by a single-input permutation.
static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask, uint64_t &BitLen, uint64_t &BitIdx, const APInt &Zeroable)
const unsigned X87StateSize
static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 64-bit integer shuffles.
static bool isLegalConversion(MVT VT, bool IsSigned, const X86Subtarget &Subtarget)
static bool isUndefOrEqual(int Val, int CmpVal)
Val is the undef sentinel value or equal to the specified value.
static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool isTargetShuffle(unsigned Opcode)
static bool isSingleElementRepeatedMask(ArrayRef< int > Mask)
Check if the Mask consists of the same element repeated multiple times.
static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG)
static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerIntVSETCC_AVX512(SDValue Op, const SDLoc &dl, SelectionDAG &DAG)
static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, SDValue N1, ArrayRef< int > Mask, SelectionDAG &DAG)
If we are extracting two 128-bit halves of a vector and shuffling the result, match that to a 256-bit...
static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 4-lane 64-bit floating point shuffles.
static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT, ArrayRef< SDValue > Ops, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or 'fsubadd' operation accordingly...
static SDValue lowerV8I16GeneralSingleInputShuffle(const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 shuffle lowering,...
static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG)
Try to turn tests against the signbit in the form of: XOR(TRUNCATE(SRL(X, size(X)-1)),...
static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef< SDValue > Ops, ArrayRef< int > Mask, bool HasVariableMask, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit floating point shuffles.
static SDValue isUpperSubvectorUndef(SDValue V, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< int > BrMergingLikelyBias("x86-br-merging-likely-bias", cl::init(0), cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely " "that all conditionals will be executed. For example for merging " "the conditionals (a == b && c > d), if its known that a == b is " "likely, then it is likely that if the conditionals are split " "both sides will be executed, so it may be desirable to increase " "the instruction cost threshold. Set to -1 to never merge likely " "branches."), cl::Hidden)
static bool clobbersFlagRegisters(const SmallVector< StringRef, 4 > &AsmPieces)
static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG)
static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, int Idx, int ExpectedIdx)
Checks whether the vector elements referenced by two shuffle masks are equivalent.
static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Try to match a vector shuffle as an element rotation.
static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi)
Return true if Val is undef, zero or if its value falls within the specified range (L,...
static const Constant * getTargetConstantFromBasePtr(SDValue Ptr)
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Original, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to emit a blend instruction for a shuffle.
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset)
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ArrayRef< SDValue > Ops, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Helper that combines an array of subvector ops as if they were the operands of a ISD::CONCAT_VECTORS ...
static bool isUndefOrInRange(int Val, int Low, int Hi)
Return true if Val is undef or if its value falls within the specified range (L, H].
static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, const SDLoc &DL, EVT VT)
static bool collectConcatOps(SDNode *N, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG)
static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Helper to recursively truncate vector elements in half with PACKSS/PACKUS.
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG)
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG)
static void computeKnownBitsForPMADDUBSW(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static std::pair< Value *, BitTestKind > FindSingleBitChange(Value *V)
static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG)
If we are converting a value to floating-point, try to replace scalar truncate of an extracted vector...
static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef< int > Mask)
Test whether there are elements crossing 128-bit lanes in this shuffle mask.
static SDValue LowerI64IntToFP16(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 4-lane 64-bit integer shuffles.
static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Emit nodes that will be selected as "cmp Op0,Op1", or something equivalent.
static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG)
const unsigned FPStateSizeInBits
static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If exactly one element of the mask is set for a non-truncating masked store, it is a vector extract a...
static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode)
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue narrowExtractedVectorSelect(SDNode *Ext, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector of a vector select and the select condition is composed of concatena...
static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isNoopShuffleMask(ArrayRef< int > Mask)
Tiny helper function to identify a no-op mask.
static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue V1, SDValue V2)
Returns a vector_shuffle node for an unpackh operation.
static SDValue combineExtractFromVectorLoad(SDNode *N, EVT VecVT, SDValue SrcVec, uint64_t Idx, const SDLoc &dl, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If this is a zero/all-bits result that is bitwise-anded with a low bits mask.
static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a byte shift sequence.
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineX86ShuffleChainWithExtract(ArrayRef< SDValue > Inputs, SDValue Root, ArrayRef< int > BaseMask, int Depth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isTargetShuffleVariableMask(unsigned Opcode)
static bool isLogicOp(unsigned Opcode)
static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG, bool BitwiseOnly)
static SDValue LowerBuildVectorv8i16(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v8i16.
static bool matchBinaryShuffle(MVT MaskVT, ArrayRef< int > Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary)
static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Try to lower as an unpack of elements from two inputs followed by a single-input permutation.
static bool canScaleShuffleElements(ArrayRef< int > Mask, unsigned NumDstElts)
static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG)
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Return a vector_shuffle of the specified vector of zero or undef vector.
static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Attempt to use the vbroadcast instruction to generate a splat value from a splat BUILD_VECTOR which u...
static SDValue combineMulToPMULDQ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG)
Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z)) This undoes the inverse fold performed in InstCom...
static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG)
static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsCommutative, SmallVectorImpl< int > &PostShuffleMask, bool ForceHorizOp)
Return 'true' if this vector operation is "horizontal" and return the operands for the horizontal ope...
static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, SmallVectorImpl< uint64_t > &RawMask, APInt &UndefElts)
static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, const X86Subtarget &Subtarget)
sext(add_nsw(x, C)) --> add(sext(x), C_sext) zext(add_nuw(x, C)) --> add(zext(x), C_zext) Promoting a...
static const Constant * getTargetConstantFromNode(LoadSDNode *Load)
static SDValue canonicalizeBitSelect(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool canCombineAsMaskOperation(SDValue V, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a dword/qword rotation.
static SDValue lowerVECTOR_COMPRESS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool isProfitableToUseFlagOp(SDValue Op)
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG)
ISD::FROUND is defined to round to nearest with ties rounding away from 0.
static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If we have a shuffle of AVX/AVX512 (256/512 bit) vectors that only uses the low half of each source v...
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG)
Do a one-to-one translation of a ISD::CondCode to the X86-specific condition code,...
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable, const X86Subtarget &Subtarget)
Try to lower a vector shuffle as a bit shift (shifts in zeros).
static SDValue getFlagsOfCmpZeroFori1(SelectionDAG &DAG, const SDLoc &DL, SDValue Mask)
static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
High-level routine to lower various 512-bit x86 vector shuffles.
static SDValue LowerBuildVectorv16i8(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v16i8.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt &UndefElts, SmallVectorImpl< APInt > &EltBits, bool AllowWholeUndefs=true, bool AllowPartialUndefs=false)
static bool detectExtMul(SelectionDAG &DAG, const SDValue &Mul, SDValue &Op0, SDValue &Op1)
static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If a vector select has an operand that is -1 or 0, try to simplify the select to a bitwise logic oper...
static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Depending on uarch and/or optimizing for size, we might prefer to use a vector operation in place of ...
static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, SelectionDAG &DAG, SDValue &Addr, SDValue &Index, Align &Alignment, unsigned &Offset)
Given a masked memory load/store operation, return true if it has one mask bit set.
static SDValue reduceVMULWidth(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
When the operands of vector mul are extended from smaller size values, like i8 and i16,...
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode)
static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG)
static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, unsigned ExpectedUses)
Returns true if is possible to fold MUL and an idiom that has already been recognized as ADDSUB/SUBAD...
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS, unsigned &LogBias, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute)
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG)
The only differences between FABS and FNEG are the mask and the logic op.
ShrinkMode
Different mul shrinking modes.
static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, const SDLoc &dl)
static SDValue combineINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue canonicalizeShuffleMaskWithHorizOp(MutableArrayRef< SDValue > Ops, MutableArrayRef< int > Mask, unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG)
Emit Masked Truncating Store with signed or unsigned saturation.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a VSELECT instruction to a vector shuffle.
static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2, MutableArrayRef< int > Mask, const APInt &Zeroable, bool &ForceV1Zero, bool &ForceV2Zero, uint64_t &BlendMask)
static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src, const SDLoc &DL)
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, EVT VT, const SDLoc &DL)
static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue V1, SDValue V2)
Returns a vector_shuffle node for an unpackl operation.
static SDValue getScalarValueForVectorElement(SDValue V, int Idx, SelectionDAG &DAG)
Try to get a scalar value for a specific element of a vector.
static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static unsigned getOpcodeForIndirectThunk(unsigned RPOpc)
static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic lowering of v16i8 shuffles.
static unsigned getSHUFPDImm(ArrayRef< int > Mask)
static bool isNullFPScalarOrVectorConst(SDValue V)
static bool hasIdenticalHalvesShuffleMask(ArrayRef< int > Mask)
Return true if a shuffle mask chooses elements identically in its top and bottom halves.
static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, unsigned &PackOpcode, ArrayRef< int > TargetMask, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned MaxStages=1)
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget)
static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to convert a vector reduction sequence composed of binops and shuffles into horizontal ops.
static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower shuffle using X86ISD::VROTLI rotations.
static SDValue lowerShuffleAsDecomposedShuffleMerge(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic routine to decompose a shuffle and blend into independent blends and permutes.
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT)
static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef< int > BlendMask, const APInt &DemandedElts, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Combine: (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) to: (brcond/cmov/setcc ....
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Optimize an EFLAGS definition used according to the condition code CC into a simpler EFLAGS value,...
static bool isBroadcastShuffleMask(ArrayRef< int > Mask)
static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineX86ShufflesRecursively(ArrayRef< SDValue > SrcOps, int SrcOpIndex, SDValue Root, ArrayRef< int > RootMask, ArrayRef< const SDNode * > SrcNodes, unsigned Depth, unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Fully generic combining of x86 shuffle instructions.
static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static StringRef getInstrStrFromOpNo(const SmallVectorImpl< StringRef > &AsmStrs, unsigned OpNo)
static bool isSequentialOrUndefOrZeroInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size,...
static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Either split a vector in halves or decompose the shuffles and the blend/unpack.
static bool canWidenShuffleElements(ArrayRef< int > Mask, SmallVectorImpl< int > &WidenedMask)
Helper function to test whether a shuffle mask could be simplified by widening the elements being shu...
static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break an unary integer operation into 2 half sized ops and then concatenate the result back.
static SDValue combineSext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit integer shuffles.
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineLogicBlendIntoConditionalNegate(EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, SelectionDAG &DAG, unsigned Depth)
Returns the scalar element that will make up the i'th element of the result of the vector shuffle.
static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable)
static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG)
Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, unsigned &InsertPSMask, const APInt &Zeroable, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef< int > Mask, const EVT &VectorType, bool &IsZeroSideLeft)
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, EVT VecVT, EVT CmpVT, bool HasPT, F SToV)
Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp expansion.
static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just pre-promote its result type since...
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Try to lower a vector shuffle as a byte rotation.
static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a shuffle as a permute of the inputs followed by an UNPCK instruction.
static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, SelectionDAG &DAG)
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget, TargetLowering::DAGCombinerInfo &DCI)
Extracting a scalar FP value from vector element 0 is free, so extract each operand first,...
static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isAddSubOrSubAddMask(ArrayRef< int > Mask, bool &Op0Even)
Checks if the shuffle mask takes subsequent elements alternately from two vectors.
static bool isCompletePermute(ArrayRef< int > Mask)
Return true if every element of a single input is referenced by the shuffle mask.
static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, SDValue EntryEBP)
When the MSVC runtime transfers control to us, either to an outlined function or when returning to a ...
static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl< SDValue > &Results)
Handles the lowering of builtin intrinsics that read the time stamp counter (x86_rdtsc and x86_rdtscp...
static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVectorAllEqual(const SDLoc &DL, SDValue LHS, SDValue RHS, ISD::CondCode CC, const APInt &OriginalMask, const X86Subtarget &Subtarget, SelectionDAG &DAG, X86::CondCode &X86CC)
static bool is128BitUnpackShuffleMask(ArrayRef< int > Mask, const SelectionDAG &DAG)
static bool isOrXorXorTree(SDValue X, bool Root=true)
Recursive helper for combineVectorSizedSetCCEquality() to see if we have a recognizable memcmp expans...
static SDValue LowerAVXExtend(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static ConstantPoolSDNode * getTargetConstantPoolFromBasePtr(SDValue Ptr)
static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, SelectionDAG &DAG, const SDLoc &DL)
Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
static bool isShuffleEquivalent(ArrayRef< int > Mask, ArrayRef< int > ExpectedMask, SDValue V1=SDValue(), SDValue V2=SDValue())
Checks whether a shuffle mask is equivalent to an explicit list of arguments.
static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 32-bit floating point shuffles.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then permuting the elements of th...
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, const X86Subtarget &Subtarget, SelectionDAG &DAG, bool ZeroUppers)
static void createPackShuffleMask(MVT VT, SmallVectorImpl< int > &Mask, bool Unary, unsigned NumStages=1)
Create a shuffle mask that matches the PACKSS/PACKUS truncation.
static bool isUndefOrEqualInRange(ArrayRef< int > Mask, int CmpVal, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is the undef...
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on floating-point adds/subs.
static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break an operation into 2 half sized ops and then concatenate the results.
static cl::opt< bool > MulConstantOptimization("mul-constant-optimization", cl::init(true), cl::desc("Replace 'mul x, Const' with more effective instructions like " "SHIFT, LEA, etc."), cl::Hidden)
static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld)
static bool isAnyZero(ArrayRef< int > Mask)
Return true if the value of any element in Mask is the zero sentinel value.
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Truncate using inreg zero extension (AND mask) and X86ISD::PACKUS.
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl< int > &Mask, APInt &KnownUndef, APInt &KnownZero)
static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, SDValue Index, SDValue Base, SDValue Scale, SelectionDAG &DAG)
static SDValue combineSubABS(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SmallVector< int, 4 > getPSHUFShuffleMask(SDValue N)
Get the PSHUF-style mask from PSHUF node.
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineShuffleToFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Combine shuffle of two fma nodes into FMAddSub or FMSubAdd.
static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isUndefOrZero(int Val)
Val is either the undef or zero sentinel value.
SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, F Builder, bool CheckBWI=true)
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL].
static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, MachineBasicBlock *BB)
static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to grab 128-bits from a vector > 128 bits.
static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue &X86CC)
static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower a vector shuffle using the SHUFPS instruction.
static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static LLVM_ATTRIBUTE_UNUSED bool isHorizOp(unsigned Opcode)
static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector CTLZ using native supported vector CTLZ instruction.
static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Extract one bit from mask vector, like v16i1 or v8i1.
static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, MVT VT, bool IsSigned, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue *Low=nullptr)
static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse)
Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the blend if only one input i...
static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask, uint64_t &BitLen, uint64_t &BitIdx)
static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS, SDValue Mask, SelectionDAG &DAG)
static SDValue combineAVG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
static cl::opt< int > BrMergingUnlikelyBias("x86-br-merging-unlikely-bias", cl::init(-1), cl::desc("Decreases 'x86-br-merging-base-cost' in cases that it is unlikely " "that all conditionals will be executed. For example for merging " "the conditionals (a == b && c > d), if its known that a == b is " "unlikely, then it is unlikely that if the conditionals are split " "both sides will be executed, so it may be desirable to decrease " "the instruction cost threshold. Set to -1 to never merge unlikely " "branches."), cl::Hidden)
static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, const SDLoc &DL, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl< int > &Mask, SmallVectorImpl< SDValue > &Ops, APInt &KnownUndef, APInt &KnownZero)
Decode a target shuffle mask and inputs and see if any values are known to be undef or zero from thei...
static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v4i32 or v4f32.
static bool isTargetShuffleEquivalent(MVT VT, ArrayRef< int > Mask, ArrayRef< int > ExpectedMask, const SelectionDAG &DAG, SDValue V1=SDValue(), SDValue V2=SDValue())
Checks whether a target shuffle mask is equivalent to an explicit pattern.
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG)
Fold "masked merge" expressions like (m & x) | (~m & y) into the equivalent ((x ^ y) & m) ^ y) patter...
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1)
static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
CMOV of constants requires materializing constant operands in registers.
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool Is64Bit, bool Is64BitLP64)
static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx)
For an EXTRACT_VECTOR_ELT with a constant index return the real underlying vector and index.
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUnaryOp(unsigned Opcode)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Optimize branch condition evaluation.
static bool hasFPCMov(unsigned X86CC)
Is there a floating point cmov for the specific X86 condition code? Current x86 isa includes the foll...
static int getOneTrueElt(SDValue V)
If V is a build vector of boolean constants and exactly one of those constants is true,...
static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static constexpr int Concat[]
if(isa< SExtInst >(LHS)) std auto IsFreeTruncation
static const unsigned FramePtr
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
static APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
opStatus next(bool nextDown)
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
unsigned popcount() const
Count the number of bits set.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
APInt abs() const
Get the absolute value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
void setSignBit()
Set the sign bit to 1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
bool eq(const APInt &RHS) const
Equality comparison.
int32_t exactLogBase2() const
void clearAllBits()
Set every bit to 0.
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
unsigned getNumSignBits() const
Computes the number of leading bits of this APInt that are equal to its sign bit.
unsigned countl_zero() const
The APInt version of std::countl_zero.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
void flipAllBits()
Toggle every bit to its opposite value.
unsigned countl_one() const
Count the number of leading one bits.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
void setAllBits()
Set every bit to 1.
bool getBoolValue() const
Convert APInt to a boolean value.
bool isMask(unsigned numBits) const
bool isMaxSignedValue() const
Determine if this is the largest signed value.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
APInt sext(unsigned width) const
Sign extend to a new width.
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool isSignBitSet() const
Determine if sign bit of this APInt is set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
bool isOne() const
Determine if this is a value of 1.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
int64_t getSExtValue() const
Get sign extended value.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
unsigned countr_one() const
Count the number of trailing one bits.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
bool isMaxValue() const
Determine if this is the largest unsigned value.
APInt truncSSat(unsigned width) const
Truncate to new width with signed saturation.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
size_type count() const
count - Returns the number of bits which are set.
bool none() const
none - Returns true if none of the bits are set.
The address of a basic block.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool getRepeatedSequence(const APInt &DemandedElts, SmallVectorImpl< SDValue > &Sequence, BitVector *UndefElements=nullptr) const
Find the shortest repeating sequence of values in the build vector.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
Value * getCalledOperand() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SGT
signed greater than
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
bool isMachineConstantPoolEntry() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Tagged union holding either a T or a Error.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type::subtype_iterator param_iterator
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasPersonalityFn() const
Check whether this function has a personality function.
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
std::vector< ConstraintInfo > ConstraintInfoVector
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
bool usesWindowsCFI() const
MCSymbol * getOrCreateParentFrameOffsetSymbol(const Twine &FuncName)
MCSymbol * getOrCreateLSDASymbol(const Twine &FuncName)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
bool is32BitVector() const
Return true if this is a 32-bit vector type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned succ_size() const
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Instructions::iterator instr_iterator
succ_reverse_iterator succ_rbegin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
succ_reverse_iterator succ_rend()
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setAdjustsStack(bool V)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
void setHasCopyImplyingStackAdjustment(bool B)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
void moveAdditionalCallInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MGATHER node.
This is a base class used to represent MGATHER and MSCATTER nodes.
This class is used to represent an MLOAD node.
This base class is used to represent MLOAD and MSTORE nodes.
const SDValue & getMask() const
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID for this memory operation.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNode * getGluedUser() const
If this node has a glue value with a user, return the user (there is at most one).
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
static bool areOnlyUsersOf(ArrayRef< const SDNode * > Nodes, const SDNode *N)
Return true if all the users of N are contained in Nodes.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
Help to insert SDNodeFlags automatically in transforming.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode)
Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op)
Return the specified value casted to the target's desired shift amount type.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
bool isEqualTo(SDValue A, SDValue B) const
Test whether two SDValues are known to compare equal.
static constexpr unsigned MaxRecursionDepth
SDValue expandVACopy(SDNode *Node)
Expand the specified ISD::VACOPY node as the Legalize pass would.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue expandVAArg(SDNode *Node)
Expand the specified ISD::VAARG node as the Legalize pass would.
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, ArrayRef< ISD::NodeType > CandidateBinOps, bool AllowPartials=false)
Match a binop + shuffle pyramid that represents a horizontal reduction over the elements of a vector ...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
void reserve(size_type NumEntries)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
static constexpr size_t npos
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum fp convert the backend supports.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
BooleanContent
Enum that describes how the target represents true/false values.
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fp-contract=xxx option.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
uint64_t getArrayNumElements() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setFAIndex(int Index)
void setAMXProgModel(AMXProgModelEnum Model)
unsigned getVarArgsGPOffset() const
int getRegSaveFrameIndex() const
ArrayRef< size_t > getPreallocatedArgOffsets(const size_t Id)
void setIsSplitCSR(bool s)
unsigned getVarArgsFPOffset() const
int getRestoreBasePointerOffset() const
int getVarArgsFrameIndex() const
void setRestoreBasePointer(const MachineFunction *MF)
void setHasPreallocatedCall(bool v)
void incNumLocalDynamicTLSAccesses()
size_t getPreallocatedStackSize(const size_t Id)
void setRAIndex(int Index)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getDarwinTLSCallPreservedMask() const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool canExtendTo512BW() const
bool isTargetMachO() const
bool useIndirectThunkBranches() const
bool isPICStyleGOT() const
const X86TargetLowering * getTargetLowering() const override
bool hasMFence() const
Use mfence if we have SSE2 or we're on x86-64 (even if we asked for no-sse2).
bool isPICStyleStubPIC() const
bool isTargetWindowsMSVC() const
bool canUseCMPXCHG8B() const
bool isTargetDarwin() const
bool isTargetWin64() const
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
const Triple & getTargetTriple() const
const X86InstrInfo * getInstrInfo() const override
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
bool canExtendTo512DQ() const
bool hasSSEPrefetch() const
bool canUseCMPXCHG16B() const
unsigned char classifyGlobalReference(const GlobalValue *GV, const Module &M) const
bool isPICStyleRIPRel() const
bool isTargetCygMing() const
unsigned char classifyLocalReference(const GlobalValue *GV) const
Classify a global variable reference for the current subtarget according to how we should reference i...
unsigned char classifyBlockAddressReference() const
Classify a blockaddress reference for the current subtarget according to how we should reference it i...
const X86RegisterInfo * getRegisterInfo() const override
bool isTargetWindowsGNU() const
unsigned getPreferVectorWidth() const
bool isTargetWindowsItanium() const
bool isTargetNaCl64() const
const X86FrameLowering * getFrameLowering() const override
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, const Module &M) const
Classify a global function reference for the current subtarget.
bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const override
Overflow nodes should get combined/lowered to optimal instructions (they should allow eliminating exp...
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const override
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool preferABDSToABSWithNSW(EVT VT) const override
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const
bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
Handle Lowering flag assembly outputs.
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const override
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth) const override
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
bool convertSelectOfConstantsToMath(EVT VT) const override
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint letter, return the type of constraint for this target.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool isLegalStoreImmediate(int64_t Imm) const override
Return true if the specified immediate is legal for the value input of a store instruction.
SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO, SDValue Ptr, SDValue Val, SDValue Mask) const override
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
bool isCtlzFast() const override
Return true if ctlz instruction is fast.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool supportSwiftError() const override
Return true if the target supports swifterror attribute.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool shouldSplatInsEltVarIndex(EVT VT) const override
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
bool isInlineAsmTargetBranch(const SmallVectorImpl< StringRef > &AsmStrs, unsigned OpNo) const override
On x86, return true if the operand with index OpNo is a CALL or JUMP instruction, which can use eithe...
MVT hasFastEqualityCompare(unsigned NumBits) const override
Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, const APInt &DemandedElts, unsigned MaskIndex, TargetLoweringOpt &TLO, unsigned Depth) const
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const override
bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, EVT VT) const override
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if we believe it is correct and profitable to reduce the load node to a smaller type.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool preferScalarizeSplat(SDNode *N) const override
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const override
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
bool isShuffleMaskLegal(ArrayRef< int > Mask, EVT VT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
bool useStackGuardXorFP() const override
If this function returns true, stack protection checks should XOR the frame pointer (or whichever poi...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
bool shouldScalarizeBinop(SDValue) const override
Scalar ops always have equal or better analysis/performance/power than the vector equivalent,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type Ty1 to type Ty2.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool areJTsAllowed(const Function *Fn) const override
Returns true if lowering to a jump table is allowed.
bool isCommutativeBinOp(unsigned Opcode) const override
Returns true if the opcode is a commutative binary operation.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const override
Returns preferred type for switch condition.
SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO, SDValue &NewLoad, SDValue Ptr, SDValue PassThru, SDValue Mask) const override
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool isVectorClearMaskLegal(ArrayRef< int > Mask, EVT VT) const override
Similar to isShuffleMaskLegal.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, const char *Constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Customize the preferred legalization strategy for certain types.
bool shouldConvertPhiType(Type *From, Type *To) const override
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type Ty1 implicit zero-extends the valu...
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const override
TargetLowering::AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const override
Return prefered fold type, Abs if this is a vector, AddAnd if its an integer, None otherwise.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool addressingModeSupportsTLS(const GlobalValue &GV) const override
Returns true if the targets addressing mode can target thread local storage (TLS).
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isBinOp(unsigned Opcode) const override
Add x86-specific opcodes to the default list.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue unwrapAddress(SDValue N) const override
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI)
bool isVectorLoadExtDesirable(SDValue) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
unsigned getStackProbeSize(const MachineFunction &MF) const
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
Replace the results of node with an illegal result type with new values built out of custom code.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool needsFixedCatchObjects() const override
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ X86_ThisCall
Similar to X86_StdCall.
@ X86_StdCall
stdcall is mostly used by the Win32 API.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ X86_FastCall
'fast' analog of X86_StdCall.
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ FRAME_TO_ARGS_OFFSET
FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to first (possible) on-stack ar...
@ RESET_FPENV
Set floating-point environment to default state.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
@ EH_RETURN
OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents 'eh_return' gcc dwarf builtin,...
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ BR
Control flow instructions. These all have token chains.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
@ BR_JT
BR_JT - Jumptable branch.
@ GC_TRANSITION_START
GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the beginning and end of GC transition s...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ LOCAL_RECOVER
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ GET_FPENV_MEM
Gets the current floating-point environment.
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ SET_FPENV_MEM
Sets the current floating point environment.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtVecInRegOpcode(unsigned Opcode)
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isExtOpcode(unsigned Opcode)
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_TLSLD
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ MO_NTPOFF
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
@ MO_TPOFF
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
@ MO_TLVP_PIC_BASE
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
@ MO_TLSGD
MO_TLSGD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ MO_TLVP
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
@ MO_SECREL
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
@ MO_DTPOFF
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_TLSLDM
MO_TLSLDM - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
@ FST
This instruction implements a truncating store from FP stack slots.
@ CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ FMAX
Floating point max and min.
@ BT
X86 bit-test instructions.
@ HADD
Integer horizontal add/sub.
@ MOVQ2DQ
Copies a 64-bit value from an MMX vector to the low word of an XMM vector, with the high word zero fi...
@ BLENDI
Blend where the selector is an immediate.
@ CMP
X86 compare and logical compare instructions.
@ BLENDV
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
@ ADDSUB
Combined add and sub on an FP vector.
@ STRICT_FMAX
Floating point max and min.
@ STRICT_CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ FHADD
Floating point horizontal add/sub.
@ NT_BRIND
BRIND node with NoTrack prefix.
@ FSETCCM
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE.
@ PEXTRB
Extract an 8-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRB.
@ FXOR
Bitwise logical XOR of floating point values.
@ BRCOND
X86 conditional branches.
@ FSETCC
X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
@ PINSRB
Insert the lower 8-bits of a 32-bit value to a vector, corresponds to X86::PINSRB.
@ INSERTPS
Insert any element of a 4 x float vector into any element of a destination 4 x floatvector.
@ PSHUFB
Shuffle 16 8-bit values within a vector.
@ PEXTRW
Extract a 16-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRW.
@ AADD
RAO arithmetic instructions.
@ FANDN
Bitwise logical ANDNOT of floating point values.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ FMAXC
Commutative FMIN and FMAX.
@ EXTRQI
SSE4A Extraction and Insertion.
@ FLD
This instruction implements an extending load to FP stack slots.
@ PSADBW
Compute Sum of Absolute Differences.
@ FOR
Bitwise logical OR of floating point values.
@ FIST
This instruction implements a fp->int store from FP stack slots.
@ FP_TO_INT_IN_MEM
This instruction implements FP_TO_SINT with the integer destination in memory and a FP reg source.
@ LADD
LOCK-prefixed arithmetic read-modify-write instructions.
@ MMX_MOVW2D
Copies a GPR into the low 32-bit word of a MMX vector and zero out the high word.
@ Wrapper
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress,...
@ PINSRW
Insert the lower 16-bits of a 32-bit value to a vector, corresponds to X86::PINSRW.
@ CMPCCXADD
Compare and Add if Condition is Met.
@ MMX_MOVD2W
Copies a 32-bit value from the low word of a MMX vector to a GPR.
@ FILD
This instruction implements SINT_TO_FP with the integer source in memory and FP reg result.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
@ ANDNP
Bitwise Logical AND NOT of Packed FP values.
@ VAARG_64
These instructions grab the address of the next argument from a va_list.
@ FAND
Bitwise logical AND of floating point values.
@ CMOV
X86 conditional moves.
@ WrapperRIP
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
@ FSHL
X86 funnel/double shift i16 instructions.
@ FRSQRT
Floating point reciprocal-sqrt and reciprocal approximation.
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into a vector splat instruction as a memory oper...
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
bool mayFoldIntoZeroExtend(SDValue Op)
Check if Op is an operation that could be folded into a zero extend x86 instruction.
bool mayFoldIntoStore(SDValue Op)
Check if Op is a value that could be used to fold a store into some other x86 instruction as a memory...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
int getCCMPCondFlagsFromCondCode(CondCode CC)
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into some other x86 instruction as a memory oper...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement)
Returns true of the given offset can be fit into displacement field of the instruction.
bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decode a move lower and zero upper instruction as a shuffle mask.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshuflw.
static const IntrinsicData * getIntrinsicWithChain(unsigned IntNo)
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ SjLj
setjmp/longjmp based exceptions
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
static void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, unsigned Reg)
Replace the address used in the instruction with the direct memory reference.
void DecodeVPERMV3Mask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void DecodeBLENDMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a BLEND immediate mask into a shuffle mask.
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a shuffle packed values at 128-bit granularity (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) immed...
void DecodeVPERMMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for VPERMQ/VPERMPD.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A EXTRQ instruction as a shuffle mask.
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
static const IntrinsicData * getIntrinsicWithoutChain(unsigned IntNo)
auto unique(Range &&R, Predicate P)
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl< int > &ShuffleMask, bool SrcIsMem)
Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask.
void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
unsigned M1(unsigned Val)
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
void createUnpackShuffleMask(EVT VT, SmallVectorImpl< int > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A INSERTQ instruction as a shuffle mask.
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
void DecodeVPERMVMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
static void verifyIntrinsicTables()
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl< int > &Mask, bool Lo)
Similar to unpacklo/unpackhi, but without the 128-bit lane limitation imposed by AVX and specific to ...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, SmallVectorImpl< int > &ShuffleMask)
Decode a scalar float move instruction as a shuffle mask.
bool isNullConstantOrUndef(SDValue V)
Returns true if V is a constant integer zero or an UNDEF node.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
static X86AddressMode getAddressFromInstr(const MachineInstr *MI, unsigned Operand)
Compute the addressing mode from an machine instruction starting with the given operand.
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
DWARFExpression::Operation Op
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ TowardNegative
roundTowardNegative.
unsigned M0(unsigned Val)
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
static uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
const char * toString(DWARFSectionKind Kind)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ INTR_TYPE_SCALAR_MASK_SAE
@ INTR_TYPE_3OP_SCALAR_MASK_SAE
@ INTR_TYPE_SCALAR_MASK_RND
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of the first element of a vector.
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for shufp*.
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufhw.
void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & x87DoubleExtended() LLVM_READNONE
static const fltSemantics & IEEEquad() LLVM_READNONE
static unsigned int semanticsPrecision(const fltSemantics &)
static const fltSemantics & IEEEdouble() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits sadd_sat(const KnownBits &LHS, const KnownBits &RHS)
Compute knownbits resulting from llvm.sadd.sat(LHS, RHS)
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
bool isZero() const
Returns true if value is all zero.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
bool isUnknown() const
Returns true if we don't know any bits.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
void setAllZero()
Make all bits known to be zero and discard any previous information.
unsigned getBitWidth() const
Get the bit width of this value.
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
bool isConstant() const
Returns true if we know the value of all bits.
void resetAll()
Resets the known state of all bits.
static KnownBits abdu(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for abdu(LHS, RHS).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
bool isNegative() const
Returns true if this value is known to be negative.
void setAllOnes()
Make all bits known to be one and discard any previous information.
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
bool isAllOnes() const
Returns true if value is all one bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
This class contains a discriminated union of information about pointers in memory operands,...
bool isDereferenceable(unsigned Size, LLVMContext &C, const DataLayout &DL) const
Return true if memory region [V, V+Offset+Size) is known to be dereferenceable.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
std::string ConstraintCode
This contains the actual string for the code, like "m".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
void AddToWorklist(SDNode *N)
bool isCalledByLegalizer() const
bool recursivelyDeleteUnusedNodes(SDNode *N)
bool isBeforeLegalize() const
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
X86AddressMode - This struct holds a generalized full x86 address mode.