[AMDGPU][True16][MC] true16 for v_div_fixup_f16#119613
Merged
broxigarchen merged 2 commits intoDec 18, 2024
Merged
Conversation
4f45969 to
ea6c36f
Compare
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesSupport true16 format for v_div_fixup_f16 in MC. Patch is 168.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119613.diff 14 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a94d690297949..05dd8e9cf530ee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9284,6 +9284,7 @@ static bool isRenamedInGFX9(int Opcode) {
GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32)
//
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
+ case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
case AMDGPU::V_INTERP_P2_F16:
case AMDGPU::V_MAD_F16_e64:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 8a9f8aa3d16d3a..9fd8bf2c8aa786 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -339,8 +339,7 @@ let FPDPRounding = 1 in {
} // End Predicates = [Has16BitInsts, isGFX8Only]
let SubtargetPredicate = isGFX9Plus in {
- defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
- VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
+ defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
} // End SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
@@ -1717,7 +1716,7 @@ defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>;
defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>;
defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
-defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index b649bab532f262..78d226a71025ed 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -1574,53 +1574,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
// GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_div_fixup_f16 v5, v1, v2, s3
-// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+v_div_fixup_f16 v5.l, v1.l, v2.l, s3
+// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
-v_div_fixup_f16 v5, v255, s2, s105
-// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+v_div_fixup_f16 v5.l, v255.l, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
-v_div_fixup_f16 v5, s1, v255, exec_hi
-// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
-v_div_fixup_f16 v5, s105, s105, exec_lo
-// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+v_div_fixup_f16 v5.l, s105, s105, exec_lo
+// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
-v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
-v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
-v_div_fixup_f16 v5, m0, 0.5, m0
-// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+v_div_fixup_f16 v5.l, m0, 0.5, m0
+// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
-v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
-v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
-// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
+// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
-// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
-// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
-v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
-v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
-// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
+// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+
+v_div_fixup_f16 v5.l, v255.h, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
v_div_fixup_f32 v5, v1, v2, s3
// GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index e6f868d2b40e7e..718f8ce2f21ac4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -1406,47 +1406,83 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13]
+
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f...
[truncated]
|
Member
|
@llvm/pr-subscribers-mc Author: Brox Chen (broxigarchen) ChangesSupport true16 format for v_div_fixup_f16 in MC. Patch is 168.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119613.diff 14 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a94d690297949..05dd8e9cf530ee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9284,6 +9284,7 @@ static bool isRenamedInGFX9(int Opcode) {
GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32)
//
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
+ case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
case AMDGPU::V_INTERP_P2_F16:
case AMDGPU::V_MAD_F16_e64:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 8a9f8aa3d16d3a..9fd8bf2c8aa786 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -339,8 +339,7 @@ let FPDPRounding = 1 in {
} // End Predicates = [Has16BitInsts, isGFX8Only]
let SubtargetPredicate = isGFX9Plus in {
- defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
- VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
+ defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
} // End SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
@@ -1717,7 +1716,7 @@ defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>;
defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>;
defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
-defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index b649bab532f262..78d226a71025ed 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -1574,53 +1574,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
// GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_div_fixup_f16 v5, v1, v2, s3
-// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+v_div_fixup_f16 v5.l, v1.l, v2.l, s3
+// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
-v_div_fixup_f16 v5, v255, s2, s105
-// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+v_div_fixup_f16 v5.l, v255.l, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
-v_div_fixup_f16 v5, s1, v255, exec_hi
-// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
-v_div_fixup_f16 v5, s105, s105, exec_lo
-// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+v_div_fixup_f16 v5.l, s105, s105, exec_lo
+// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
-v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
-v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
-v_div_fixup_f16 v5, m0, 0.5, m0
-// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+v_div_fixup_f16 v5.l, m0, 0.5, m0
+// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
-v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
-v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
-// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
+// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
-// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
-// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
-v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
-v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
-// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
+// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+
+v_div_fixup_f16 v5.l, v255.h, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
v_div_fixup_f32 v5, v1, v2, s3
// GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index e6f868d2b40e7e..718f8ce2f21ac4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -1406,47 +1406,83 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13]
+
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f...
[truncated]
|
arsenm
approved these changes
Dec 13, 2024
Contributor
Author
|
resolved conflicts |
Sisyph
reviewed
Dec 19, 2024
| GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32) | ||
| // | ||
| case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64: | ||
| case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64: |
Contributor
There was a problem hiding this comment.
This looks slightly weird. The fake16 instruction didn't exist until gfx11. But it depends on how isRenamedInGFX9 is used. I'm not sure if it is necessary, harmless, or harmful.
This was referenced Jun 2, 2025
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Support true16 format for v_div_fixup_f16 in MC.