diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h index da7323855b813..490d1a34cc846 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h @@ -103,6 +103,20 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver { /// \return The known alignment for the pointer-like value \p R. Align computeKnownAlignment(Register R, unsigned Depth = 0); + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// valid constant range. + std::optional + getValidShiftAmountRange(Register R, const APInt &DemandedElts, + unsigned Depth); + + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// minimum possible value. + std::optional getValidMinimumShiftAmount(Register R, + const APInt &DemandedElts, + unsigned Depth = 0); + /// Determine which floating-point classes are valid for \p V, and return them /// in KnownFPClass bit sets. /// diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 1286af864fb3f..974fc40de6222 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -1884,6 +1884,14 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, } break; } + case TargetOpcode::G_ASHR: { + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1)) + FirstAnswer = std::min(FirstAnswer + *C, TyBits); + break; + } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src); @@ -2053,6 +2061,64 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) { return computeNumSignBits(R, DemandedElts, Depth); } +std::optional GISelValueTracking::getValidShiftAmountRange( + Register R, const APInt &DemandedElts, unsigned Depth) { + // Shifting more than the bitwidth is not valid. + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + + LLT Ty = MRI.getType(R); + unsigned BitWidth = Ty.getScalarSizeInBits(); + + if (Opcode == TargetOpcode::G_CONSTANT) { + const APInt &ShAmt = MI.getOperand(1).getCImm()->getValue(); + if (ShAmt.uge(BitWidth)) + return std::nullopt; + return ConstantRange(ShAmt); + } + + if (Opcode == TargetOpcode::G_BUILD_VECTOR) { + const APInt *MinAmt = nullptr, *MaxAmt = nullptr; + for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { + if (!DemandedElts[I]) + continue; + MachineInstr *Op = MRI.getVRegDef(MI.getOperand(I + 1).getReg()); + if (Op->getOpcode() != TargetOpcode::G_CONSTANT) { + MinAmt = MaxAmt = nullptr; + break; + } + + const APInt &ShAmt = Op->getOperand(1).getCImm()->getValue(); + if (ShAmt.uge(BitWidth)) + return std::nullopt; + if (!MinAmt || MinAmt->ugt(ShAmt)) + MinAmt = &ShAmt; + if (!MaxAmt || MaxAmt->ult(ShAmt)) + MaxAmt = &ShAmt; + } + assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) && + "Failed to find matching min/max shift amounts"); + if (MinAmt && MaxAmt) + return ConstantRange(*MinAmt, *MaxAmt + 1); + } + + // Use computeKnownBits to find a hidden constant/knownbits (usually type + // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc. + KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth); + if (KnownAmt.getMaxValue().ult(BitWidth)) + return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false); + + return std::nullopt; +} + +std::optional GISelValueTracking::getValidMinimumShiftAmount( + Register R, const APInt &DemandedElts, unsigned Depth) { + if (std::optional AmtRange = + getValidShiftAmountRange(R, DemandedElts, Depth)) + return AmtRange->getUnsignedMin().getZExtValue(); + return std::nullopt; +} + void GISelValueTrackingAnalysisLegacy::getAnalysisUsage( AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir new file mode 100644 index 0000000000000..8552931c1f4c0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir @@ -0,0 +1,109 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple aarch64 -passes="print" %s -o - 2>&1 | FileCheck %s + +--- +name: Cst +body: | + bb.1: + ; CHECK-LABEL: name: @Cst + ; CHECK-NEXT: %0:_ KnownBits:10000000 SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:11110000 SignBits:4 + %0:_(s8) = G_CONSTANT i8 128 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ASHR %0, %1 +... +--- +name: CstBig +body: | + bb.1: + ; CHECK-LABEL: name: @CstBig + ; CHECK-NEXT: %0:_ KnownBits:11111000 SignBits:5 + ; CHECK-NEXT: %1:_ KnownBits:00000110 SignBits:5 + ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8 + %0:_(s8) = G_CONSTANT i8 248 + %1:_(s8) = G_CONSTANT i8 6 + %2:_(s8) = G_ASHR %0, %1 +... +--- +name: ScalarVar +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarVar + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = COPY $b1 + %2:_(s8) = G_ASHR %0, %1 +... +--- +name: ScalarCst +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarCst + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:4 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ASHR %0, %1 +... +--- +name: VectorVar +body: | + bb.1: + ; CHECK-LABEL: name: @VectorVar + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<4 x s16>) = G_ASHR %0, %1 +... +--- +name: VectorCst +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:4 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ASHR %0, %2 +... +--- +name: VectorCst36 +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst36 + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000110 SignBits:13 + ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:4 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(s16) = G_CONSTANT i16 6 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1 + %4:_(<4 x s16>) = G_ASHR %0, %3 +... +--- +name: VectorCst3unknown +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst3unknown + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %2:_(s16) = COPY $h0 + %1:_(s16) = G_CONSTANT i16 3 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1 + %4:_(<4 x s16>) = G_ASHR %0, %3 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll index be79135c8b831..747db396bc807 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -14,10 +14,10 @@ define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) { ; CHECK-GI-LABEL: dupsext_v8i8_v8i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: lsl w8, w0, #8 -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 ; CHECK-GI-NEXT: dup v1.8h, w8 -; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: xtn v1.8b, v1.8h +; CHECK-GI-NEXT: smull v0.8h, v1.8b, v0.8b ; CHECK-GI-NEXT: ret entry: %in = sext i8 %src to i16 diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 2f23a32c36a9f..6e5c666bdbc75 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -2264,33 +2264,12 @@ define <2 x i64> @lsr_const(<2 x i64> %a, <2 x i64> %b) { } define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) { -; CHECK-NEON-LABEL: asr: -; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-NEON-NEXT: shrn v1.2s, v1.2d, #32 -; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEON-NEXT: ret -; -; CHECK-SVE-LABEL: asr: -; CHECK-SVE: // %bb.0: -; CHECK-SVE-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-SVE-NEXT: shrn v1.2s, v1.2d, #32 -; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-SVE-NEXT: ret -; -; CHECK-GI-LABEL: asr: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32 -; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #32 -; CHECK-GI-NEXT: fmov x10, d0 -; CHECK-GI-NEXT: fmov x11, d1 -; CHECK-GI-NEXT: mov x8, v0.d[1] -; CHECK-GI-NEXT: mov x9, v1.d[1] -; CHECK-GI-NEXT: mul x10, x10, x11 -; CHECK-GI-NEXT: mul x8, x8, x9 -; CHECK-GI-NEXT: fmov d0, x10 -; CHECK-GI-NEXT: mov v0.d[1], x8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: asr: +; CHECK: // %bb.0: +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: ret %x = ashr <2 x i64> %a, %y = ashr <2 x i64> %b, %z = mul nsw <2 x i64> %x, %y @@ -2298,34 +2277,12 @@ define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) { } define <2 x i64> @asr_const(<2 x i64> %a, <2 x i64> %b) { -; CHECK-NEON-LABEL: asr_const: -; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: movi v1.2s, #31 -; CHECK-NEON-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEON-NEXT: ret -; -; CHECK-SVE-LABEL: asr_const: -; CHECK-SVE: // %bb.0: -; CHECK-SVE-NEXT: movi v1.2s, #31 -; CHECK-SVE-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-SVE-NEXT: ret -; -; CHECK-GI-LABEL: asr_const: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI81_0 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI81_0] -; CHECK-GI-NEXT: fmov x10, d0 -; CHECK-GI-NEXT: fmov x11, d1 -; CHECK-GI-NEXT: mov x8, v0.d[1] -; CHECK-GI-NEXT: mov x9, v1.d[1] -; CHECK-GI-NEXT: mul x10, x10, x11 -; CHECK-GI-NEXT: mul x8, x8, x9 -; CHECK-GI-NEXT: fmov d0, x10 -; CHECK-GI-NEXT: mov v0.d[1], x8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: asr_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2s, #31 +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: ret %x = ashr <2 x i64> %a, %z = mul nsw <2 x i64> %x, ret <2 x i64> %z diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index 2b7fa085cf603..e1ba0e98a6c01 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -1631,7 +1631,6 @@ define i8 @combine_i8_sdiv_const100(i8 %x) { ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #41 // =0x29 ; CHECK-GI-NEXT: mul w8, w8, w9 -; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 ; CHECK-GI-NEXT: asr w8, w8, #4 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index b124042265d40..c57383ad9b1e7 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -52,7 +52,6 @@ define i8 @si8_100(i8 %a, i8 %b) { ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #41 // =0x29 ; CHECK-GI-NEXT: mul w8, w8, w9 -; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 ; CHECK-GI-NEXT: asr w8, w8, #4 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir index cd69104851560..69e3561b362eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir @@ -80,8 +80,7 @@ body: | ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; ; GFX9-LABEL: name: test_smulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -93,8 +92,7 @@ body: | ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -200,9 +198,7 @@ body: | ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 16 ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]] ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR1]], 16 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir index 2c545c89da218..1025d605f35f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir @@ -92,8 +92,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 20 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: $vgpr0 = COPY [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 16 %2:_(s32) = G_ASHR %0, %1(s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll index 4b999b892ed35..6dc330f111bc3 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll @@ -414,8 +414,7 @@ define i32 @sdiv_constant_srai(i32 %a) nounwind { ; RV64-NEXT: addi a1, a1, 1639 ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: srai a0, a0, 32 -; RV64-NEXT: sraiw a0, a0, 1 +; RV64-NEXT: srai a0, a0, 33 ; RV64-NEXT: srliw a1, a0, 31 ; RV64-NEXT: addw a0, a0, a1 ; RV64-NEXT: ret @@ -656,8 +655,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; RV32IM-NEXT: srai a0, a0, 24 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: slli a0, a0, 16 -; RV32IM-NEXT: srai a0, a0, 24 -; RV32IM-NEXT: slli a0, a0, 24 ; RV32IM-NEXT: srai a0, a0, 25 ; RV32IM-NEXT: zext.b a1, a0 ; RV32IM-NEXT: srli a1, a1, 7 @@ -670,9 +667,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; RV32IMZB-NEXT: sext.b a0, a0 ; RV32IMZB-NEXT: mul a0, a0, a1 ; RV32IMZB-NEXT: sext.h a0, a0 -; RV32IMZB-NEXT: srai a0, a0, 8 -; RV32IMZB-NEXT: sext.b a0, a0 -; RV32IMZB-NEXT: srai a0, a0, 1 +; RV32IMZB-NEXT: srai a0, a0, 9 ; RV32IMZB-NEXT: zext.b a1, a0 ; RV32IMZB-NEXT: srli a1, a1, 7 ; RV32IMZB-NEXT: add a0, a0, a1 @@ -685,8 +680,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; RV64IM-NEXT: srai a0, a0, 56 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srai a0, a0, 56 -; RV64IM-NEXT: slli a0, a0, 56 ; RV64IM-NEXT: srai a0, a0, 57 ; RV64IM-NEXT: zext.b a1, a0 ; RV64IM-NEXT: srli a1, a1, 7 @@ -699,9 +692,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; RV64IMZB-NEXT: sext.b a0, a0 ; RV64IMZB-NEXT: mul a0, a0, a1 ; RV64IMZB-NEXT: sext.h a0, a0 -; RV64IMZB-NEXT: srai a0, a0, 8 -; RV64IMZB-NEXT: sext.b a0, a0 -; RV64IMZB-NEXT: srai a0, a0, 1 +; RV64IMZB-NEXT: srai a0, a0, 9 ; RV64IMZB-NEXT: zext.b a1, a0 ; RV64IMZB-NEXT: srli a1, a1, 7 ; RV64IMZB-NEXT: add a0, a0, a1 @@ -906,8 +897,6 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind { ; RV32IM-NEXT: addi a1, a1, 1639 ; RV32IM-NEXT: srai a0, a0, 16 ; RV32IM-NEXT: mul a0, a0, a1 -; RV32IM-NEXT: srai a0, a0, 16 -; RV32IM-NEXT: slli a0, a0, 16 ; RV32IM-NEXT: srai a0, a0, 17 ; RV32IM-NEXT: slli a1, a0, 16 ; RV32IM-NEXT: srli a1, a1, 16 @@ -921,9 +910,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind { ; RV32IMZB-NEXT: addi a1, a1, 1639 ; RV32IMZB-NEXT: sext.h a0, a0 ; RV32IMZB-NEXT: mul a0, a0, a1 -; RV32IMZB-NEXT: srai a0, a0, 16 -; RV32IMZB-NEXT: sext.h a0, a0 -; RV32IMZB-NEXT: srai a0, a0, 1 +; RV32IMZB-NEXT: srai a0, a0, 17 ; RV32IMZB-NEXT: zext.h a1, a0 ; RV32IMZB-NEXT: srli a1, a1, 15 ; RV32IMZB-NEXT: add a0, a0, a1 @@ -936,9 +923,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind { ; RV64IM-NEXT: addi a1, a1, 1639 ; RV64IM-NEXT: srai a0, a0, 48 ; RV64IM-NEXT: mul a0, a0, a1 -; RV64IM-NEXT: sraiw a0, a0, 16 -; RV64IM-NEXT: slli a0, a0, 48 -; RV64IM-NEXT: srai a0, a0, 49 +; RV64IM-NEXT: sraiw a0, a0, 17 ; RV64IM-NEXT: slli a1, a0, 48 ; RV64IM-NEXT: srli a1, a1, 48 ; RV64IM-NEXT: srli a1, a1, 15 @@ -951,9 +936,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind { ; RV64IMZB-NEXT: addi a1, a1, 1639 ; RV64IMZB-NEXT: sext.h a0, a0 ; RV64IMZB-NEXT: mul a0, a0, a1 -; RV64IMZB-NEXT: sraiw a0, a0, 16 -; RV64IMZB-NEXT: sext.h a0, a0 -; RV64IMZB-NEXT: srai a0, a0, 1 +; RV64IMZB-NEXT: sraiw a0, a0, 17 ; RV64IMZB-NEXT: zext.h a1, a0 ; RV64IMZB-NEXT: srli a1, a1, 15 ; RV64IMZB-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir index 78a2227b84a3a..a7c1c6355bff6 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir @@ -88,8 +88,7 @@ body: | ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASSERT_SEXT]], [[ASHR]] ; RV64I-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 32 ; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SEXT_INREG]], [[ASHR]] - ; RV64I-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[XOR]], 32 - ; RV64I-NEXT: $x10 = COPY [[SEXT_INREG1]](s64) + ; RV64I-NEXT: $x10 = COPY [[XOR]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 ; ; RV64ZBB-LABEL: name: abs_i32 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 9690302552090..735c6ccb6dca6 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -1053,9 +1053,8 @@ define signext i32 @abs_i32_sext(i32 signext %x) { ; RV64I-LABEL: abs_i32_sext: ; RV64I: # %bb.0: ; RV64I-NEXT: srai a1, a0, 31 -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: abs_i32_sext: