[VectorCombine] Add Ext and Trunc support in foldBitOpOfCastConstant#157822
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Hongyu Chen (XChy) ChangesFollow-up of #155216. Full diff: https://github.com/llvm/llvm-project/pull/157822.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b1c7a2682785b..dc82c92264c07 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1006,6 +1006,9 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
// Only handle supported cast operations
switch (CastOpcode) {
case Instruction::BitCast:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
break;
default:
return false;
diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index c6253a7b858ad..acf39a258315f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
; Test bitwise operations with truncate and one constant
define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
-; CHECK-NEXT: ret <4 x i16> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i32> [[A1:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[T1]]
;
%t1 = trunc <4 x i32> %a to <4 x i16>
%or = or <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
@@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
; Test bitwise operations with zero extend and one constant
define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) {
; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[Z1]]
;
%z1 = zext <4 x i16> %a to <4 x i32>
%or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
@@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
; Test bitwise operations with sign extend and one constant
define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant(
-; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[S1]]
;
%s1 = sext <4 x i8> %a to <4 x i32>
%or = or <4 x i32> %s1, <i32 1, i32 2, i32 3, i32 4>
@@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant(
-; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], <i32 -1, i32 -2, i32 -3, i32 -4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 -1, i8 -2, i8 -3, i8 -4>
+; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[S1]]
;
%s1 = sext <4 x i8> %a to <4 x i32>
%or = or <4 x i32> %s1, <i32 -1, i32 -2, i32 -3, i32 -4>
@@ -356,8 +356,8 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
; Test truncate with flag preservation and one constant
define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i16>
@@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -390,8 +390,8 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
; Test sign extend with nneg flag and one constant
define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
; CHECK-LABEL: @or_zext_nneg_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i16> [[A:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i16> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i16> %a to <4 x i32>
@@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_zext_nneg_minus_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i8> %a to <4 x i32>
@@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) {
; CHECK-LABEL: @or_zext_nneg_multiconstant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i8> %a to <4 x i32>
|
Rajveer100
left a comment
There was a problem hiding this comment.
We could use splat for cases like these for better visibility:
%u.masked = and <16 x i32> %u, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>|
@Rajveer100. Do you mean replacing terms like |
Not quite, this would change the test case itself, I meant replacing the ones where they all are equal not affecting the actual values. |
|
Oh, you mean updating |
Follow-up of #155216.
This patch doesn't preserve the flags. I will implement it in the follow-up patch.