Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ class BoUpSLP;

struct SLPVectorizerPass : public OptionalPassInfoMixin<SLPVectorizerPass> {
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = SmallMapVector<Value *, StoreList, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
using GEPList = SmallVector<GetElementPtrInst *, 8>;
using GEPListMap = SmallMapVector<Value *, GEPList, 8>;
using GEPListMap = MapVector<Value *, GEPList>;
using InstSetVector = SmallSetVector<Instruction *, 8>;

ScalarEvolution *SE = nullptr;
Expand Down
808 changes: 204 additions & 604 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/WebAssembly/slp-memory-interleave.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=slp-vectorizer %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s

%struct.TwoBytes = type { i8, i8 }
%struct.FourBytes = type { i8, i8, i8, i8 }
Expand Down
129 changes: 98 additions & 31 deletions llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,105 @@ target triple = "aarch64"
define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) {
; CHECK-LABEL: define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii
; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: .preheader.i:
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP2]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: br label [[DOTPREHEADER_I:%.*]]
; CHECK: .preheader.i:
; CHECK-NEXT: [[DOT027_I:%.*]] = phi ptr [ [[TMP0]], [[TMP4:%.*]] ], [ [[TMP23:%.*]], [[DOTPREHEADER_I]] ]
; CHECK-NEXT: [[DOT01926_I:%.*]] = phi i32 [ 0, [[TMP4]] ], [ [[TMP26:%.*]], [[DOTPREHEADER_I]] ]
; CHECK-NEXT: [[DOT02025_I:%.*]] = phi float [ 0.000000e+00, [[TMP4]] ], [ [[TMP25:%.*]], [[DOTPREHEADER_I]] ]
; CHECK-NEXT: [[DOT02124_I:%.*]] = phi ptr [ [[TMP1]], [[TMP4]] ], [ [[TMP24:%.*]], [[DOTPREHEADER_I]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[DOT027_I]], i64 80
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[DOT02124_I]], i64 80
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP11:%.*]] = load <20 x float>, ptr [[DOT027_I]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP12:%.*]] = load <20 x float>, ptr [[DOT02124_I]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <22 x float> poison, float [[TMP8]], i64 20
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <22 x float> [[TMP13]], float [[DOT02025_I]], i64 21
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <20 x float> [[TMP11]], <20 x float> poison, <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <22 x float> [[TMP15]], <22 x float> [[TMP14]], <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 42, i32 43>
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <22 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0.000000e+00>, float [[TMP10]], i64 20
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <20 x float> [[TMP12]], <20 x float> poison, <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <22 x float> [[TMP18]], <22 x float> [[TMP17]], <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 42, i32 43>
; CHECK-NEXT: [[TMP20:%.*]] = fsub <22 x float> [[TMP16]], [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <22 x float> [[TMP20]], float 1.000000e+00, i64 21
; CHECK-NEXT: [[TMP22:%.*]] = fmul <22 x float> [[TMP20]], [[TMP21]]
; CHECK-NEXT: [[TMP23]] = getelementptr inbounds [4 x i8], ptr [[DOT027_I]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP24]] = getelementptr inbounds [4 x i8], ptr [[DOT02124_I]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP25]] = tail call fast float @llvm.vector.reduce.fadd.v22f32(float 0.000000e+00, <22 x float> [[TMP22]])
; CHECK-NEXT: [[TMP26]] = add nuw nsw i32 [[DOT01926_I]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[TMP26]], 7
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZL6REDUCEILI7EEFPKFS1_II_EXIT:%.*]], label [[DOTPREHEADER_I]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: _ZL6reduceILi7EEfPKfS1_ii.exit:
; CHECK-NEXT: ret float [[TMP25]]
; CHECK-NEXT: [[TMP6:%.*]] = load <20 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = load <20 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <20 x float> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <20 x float> [[TMP8]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 80
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP14:%.*]] = fsub fast float [[TMP11]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast float [[TMP14]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP1]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP15]], <20 x float> [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = load <20 x float>, ptr [[TMP16]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP19:%.*]] = load <20 x float>, ptr [[TMP17]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP20:%.*]] = fsub fast <20 x float> [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <20 x float> [[TMP20]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 80
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 80
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP26:%.*]] = fsub fast float [[TMP23]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fmul fast float [[TMP26]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP16]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP17]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX_1:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP27]], <20 x float> [[TMP21]])
; CHECK-NEXT: [[OP_RDX3_1:%.*]] = fadd fast float [[OP_RDX_1]], [[OP_RDX]]
; CHECK-NEXT: [[TMP30:%.*]] = load <20 x float>, ptr [[TMP28]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP31:%.*]] = load <20 x float>, ptr [[TMP29]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP32:%.*]] = fsub fast <20 x float> [[TMP30]], [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = fmul fast <20 x float> [[TMP32]], [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP28]], i64 80
; CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP29]], i64 80
; CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP38:%.*]] = fsub fast float [[TMP35]], [[TMP37]]
; CHECK-NEXT: [[TMP39:%.*]] = fmul fast float [[TMP38]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP28]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP29]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX_2:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP39]], <20 x float> [[TMP33]])
; CHECK-NEXT: [[OP_RDX3_2:%.*]] = fadd fast float [[OP_RDX_2]], [[OP_RDX3_1]]
; CHECK-NEXT: [[TMP42:%.*]] = load <20 x float>, ptr [[TMP40]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP43:%.*]] = load <20 x float>, ptr [[TMP41]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP44:%.*]] = fsub fast <20 x float> [[TMP42]], [[TMP43]]
; CHECK-NEXT: [[TMP45:%.*]] = fmul fast <20 x float> [[TMP44]], [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP40]], i64 80
; CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP46]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP41]], i64 80
; CHECK-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float [[TMP47]], [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = fmul fast float [[TMP50]], [[TMP50]]
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP40]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP41]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX_3:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP51]], <20 x float> [[TMP45]])
; CHECK-NEXT: [[OP_RDX3_3:%.*]] = fadd fast float [[OP_RDX_3]], [[OP_RDX3_2]]
; CHECK-NEXT: [[TMP54:%.*]] = load <20 x float>, ptr [[TMP52]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP55:%.*]] = load <20 x float>, ptr [[TMP53]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP56:%.*]] = fsub fast <20 x float> [[TMP54]], [[TMP55]]
; CHECK-NEXT: [[TMP57:%.*]] = fmul fast <20 x float> [[TMP56]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 80
; CHECK-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP58]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP53]], i64 80
; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP60]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP62:%.*]] = fsub fast float [[TMP59]], [[TMP61]]
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP62]], [[TMP62]]
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP52]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP53]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX_4:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP63]], <20 x float> [[TMP57]])
; CHECK-NEXT: [[OP_RDX3_4:%.*]] = fadd fast float [[OP_RDX_4]], [[OP_RDX3_3]]
; CHECK-NEXT: [[TMP66:%.*]] = load <20 x float>, ptr [[TMP64]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP67:%.*]] = load <20 x float>, ptr [[TMP65]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP68:%.*]] = fsub fast <20 x float> [[TMP66]], [[TMP67]]
; CHECK-NEXT: [[TMP69:%.*]] = fmul fast <20 x float> [[TMP68]], [[TMP68]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP64]], i64 80
; CHECK-NEXT: [[TMP71:%.*]] = load float, ptr [[TMP70]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP65]], i64 80
; CHECK-NEXT: [[TMP73:%.*]] = load float, ptr [[TMP72]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP74:%.*]] = fsub fast float [[TMP71]], [[TMP73]]
; CHECK-NEXT: [[TMP75:%.*]] = fmul fast float [[TMP74]], [[TMP74]]
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP64]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP65]], i64 [[TMP4]]
; CHECK-NEXT: [[OP_RDX_5:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP75]], <20 x float> [[TMP69]])
; CHECK-NEXT: [[OP_RDX3_5:%.*]] = fadd fast float [[OP_RDX_5]], [[OP_RDX3_4]]
; CHECK-NEXT: [[TMP78:%.*]] = load <20 x float>, ptr [[TMP76]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP79:%.*]] = load <20 x float>, ptr [[TMP77]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP80:%.*]] = fsub fast <20 x float> [[TMP78]], [[TMP79]]
; CHECK-NEXT: [[TMP81:%.*]] = fmul fast <20 x float> [[TMP80]], [[TMP80]]
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP76]], i64 80
; CHECK-NEXT: [[TMP83:%.*]] = load float, ptr [[TMP82]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP77]], i64 80
; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP84]], align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP86:%.*]] = fsub fast float [[TMP83]], [[TMP85]]
; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], [[TMP86]]
; CHECK-NEXT: [[OP_RDX_6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP87]], <20 x float> [[TMP81]])
; CHECK-NEXT: [[OP_RDX3_6:%.*]] = fadd fast float [[OP_RDX_6]], [[OP_RDX3_5]]
; CHECK-NEXT: ret float [[OP_RDX3_6]]
;
%5 = alloca ptr, align 8
%6 = alloca ptr, align 8
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {

define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[X210:%.*]] = tail call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP1]])
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]]
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: ret i32 [[X210]]
;
%x0 = extractelement <4 x i32> %x, i32 0
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,11 @@ define float @reduce_fast_float_case1(ptr %a) {
; CHECK-LABEL: define float @reduce_fast_float_case1(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <5 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[ADD4:%.*]] = call fast float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> [[TMP0]])
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]])
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[LOAD4]]
; CHECK-NEXT: ret float [[ADD4]]
;
entry:
Expand Down
22 changes: 0 additions & 22 deletions llvm/test/Transforms/SLPVectorizer/AArch64/scalable-type-revec.ll

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ define <vscale x 4 x i32> @build_vec_v4i32_reuse_0(<vscale x 2 x i32> %v0) {
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <vscale x 2 x i32> [[V0:%.*]], i32 0
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <vscale x 2 x i32> [[V0]], i32 1
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]]
; CHECK-NEXT: [[TMP0_0:%.*]] = mul i32 [[V0_0]], 2
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]]
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3_0]]
Expand Down
20 changes: 11 additions & 9 deletions llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,29 @@ define dso_local void @l(i1 %arg) local_unnamed_addr {
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP9:%.*]], [[BB25:%.*]] ]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB3:%.*]], label [[BB11:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], splat (i16 8)
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <3 x i32> <i32 0, i32 1, i32 poison>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <3 x i1> <i1 poison, i1 poison, i1 undef>, <3 x i1> [[TMP10]], <3 x i32> <i32 3, i32 4, i32 2>
; CHECK-NEXT: br label [[BB25]]
; CHECK: bb11:
; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP0]], undef
; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> undef, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i8> [[TMP8]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <2 x i32> undef, [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i1> [[TMP7]], <2 x i1> poison, <3 x i32> <i32 0, i32 1, i32 poison>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <3 x i1> <i1 poison, i1 poison, i1 undef>, <3 x i1> [[TMP11]], <3 x i32> <i32 3, i32 4, i32 2>
; CHECK-NEXT: br label [[BB25]]
; CHECK: bb25:
; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i1> [ [[TMP7]], [[BB11]] ], [ [[TMP2]], [[BB3]] ]
; CHECK-NEXT: [[TMP9]] = phi <2 x i16> [ [[TMP3]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <3 x i1> [ [[TMP16]], [[BB11]] ], [ [[TMP15]], [[BB3]] ]
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.and.v3i1(<3 x i1> [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
; CHECK-NEXT: [[I33:%.*]] = and i32 [[TMP13]], undef
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP13]]
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]
; CHECK-NEXT: br i1 [[ARG]], label [[BB34:%.*]], label [[BB1]]
; CHECK: bb34:
; CHECK-NEXT: [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ]
Expand Down
Loading
Loading