diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6ae492ed6b988..3dc9f503088e6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8487,7 +8487,7 @@ static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG, return DAG.getBitcast(VT, Res); } -// Lower BUILD_VECTOR operation for v8i1 and v16i1 types. +// Lower BUILD_VECTOR operation for vXi1 types. static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -8551,6 +8551,25 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, } } + // See if we can cheaply generate a vXi8 vector and convert to vXi1. + // TODO: Add handling for sub-128bit vXi8 vectors. + MVT OpVT = Op.getOperand(0).getSimpleValueType(); + if (NonConstIdx.size() > 1 && OpVT == MVT::i8) { + // On pre-BWI targets, we must extend to vXi32 instead. + MVT ByteVT = VT.changeVectorElementType(MVT::i8); + MVT WideVT = + Subtarget.hasBWI() ? ByteVT : VT.changeVectorElementType(MVT::i32); + if (DAG.getTargetLoweringInfo().isTypeLegal(ByteVT) && + DAG.getTargetLoweringInfo().isTypeLegal(WideVT)) { + SDValue ByteBV = DAG.getBuildVector(ByteVT, dl, Op->ops()); + SDValue WideBV = DAG.getNode(ISD::ANY_EXTEND, dl, WideVT, ByteBV); + WideBV = DAG.getNode(ISD::AND, dl, WideVT, WideBV, + DAG.getConstant(1, dl, WideVT)); + return DAG.getSetCC(dl, VT, WideBV, DAG.getConstant(0, dl, WideVT), + ISD::SETNE); + } + } + // insert elements one by one SDValue DstVec; if (HasConstElts) { diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index ca547cee86db4..86fd2040b688e 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -679,230 +679,47 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: pushq %r13 ; KNL-NEXT: pushq %r12 ; KNL-NEXT: pushq %rbx +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: andl $1, %edi -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-5, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-9, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-17, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-33, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-65, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-129, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-257, %di ## imm = 0xFEFF -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-1025, %di ## imm = 0xFBFF -; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-2049, %di ## imm = 0xF7FF -; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-4097, %di ## imm = 0xEFFF -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $2, %k5, %k5 -; KNL-NEXT: korw %k5, %k0, %k5 -; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: andl $1, %esi -; KNL-NEXT: kmovw %edx, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %esi, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: kmovw %r8d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: kmovw %r9d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $5, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kandw %k3, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $4, %k4, %k4 -; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kandw %k2, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $3, %k3, %k3 -; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kandw %k1, %k2, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $2, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k0, %k1, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: vmovd %esi, %xmm2 +; KNL-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, %r8d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: kandw %k1, %k2, %k1 +; KNL-NEXT: kandw %k0, %k1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k2} ; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %k1, %r9d @@ -997,224 +814,46 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; SKX-NEXT: pushq %r13 ; SKX-NEXT: pushq %r12 ; SKX-NEXT: pushq %rbx -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: movq %rdi, %rax -; SKX-NEXT: kshiftld $31, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftrd $30, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $31, %k1, %k1 -; SKX-NEXT: kord %k0, %k1, %k0 -; SKX-NEXT: movl $-5, %edi -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $29, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-9, %edi -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $28, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-17, %edi -; SKX-NEXT: kmovd %edi, %k2 -; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandd %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $27, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-33, %edi -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $26, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-65, %edi -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $25, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-129, %edi -; SKX-NEXT: kmovd %edi, %k2 -; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandd %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $24, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-257, %edi ## imm = 0xFEFF -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $23, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-513, %edi ## imm = 0xFDFF -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $22, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-1025, %edi ## imm = 0xFBFF -; SKX-NEXT: kmovd %edi, %k2 -; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandd %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $21, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-2049, %edi ## imm = 0xF7FF -; SKX-NEXT: kmovd %edi, %k6 -; SKX-NEXT: kandd %k6, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $20, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-4097, %edi ## imm = 0xEFFF -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $19, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-8193, %edi ## imm = 0xDFFF -; SKX-NEXT: kmovd %edi, %k5 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandd %k5, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $18, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-16385, %edi ## imm = 0xBFFF -; SKX-NEXT: kmovd %edi, %k4 -; SKX-NEXT: kandd %k4, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $17, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: movl $-32769, %edi ## imm = 0xFFFF7FFF -; SKX-NEXT: kmovd %edi, %k3 -; SKX-NEXT: kandd %k3, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $16, %k7, %k7 -; SKX-NEXT: kord %k7, %k0, %k7 -; SKX-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF -; SKX-NEXT: kmovd %edi, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kandd %k2, %k7, %k7 -; SKX-NEXT: kshiftld $31, %k0, %k0 -; SKX-NEXT: kshiftrd $15, %k0, %k0 -; SKX-NEXT: kord %k0, %k7, %k0 -; SKX-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kmovd %edx, %k0 -; SKX-NEXT: kshiftld $31, %k0, %k0 -; SKX-NEXT: kshiftrd $30, %k0, %k0 -; SKX-NEXT: kmovd %esi, %k7 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $31, %k7, %k7 -; SKX-NEXT: kord %k0, %k7, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovd %ecx, %k7 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $29, %k7, %k7 -; SKX-NEXT: kord %k7, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovd %r8d, %k7 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $28, %k7, %k7 -; SKX-NEXT: kord %k7, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovd %r9d, %k7 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $27, %k7, %k7 -; SKX-NEXT: kord %k7, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k1 -; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $26, %k7, %k7 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kord %k7, %k1, %k1 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload -; SKX-NEXT: kandd %k7, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k0, %k0 -; SKX-NEXT: kshiftrd $25, %k0, %k0 -; SKX-NEXT: kord %k0, %k1, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $24, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $23, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $22, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $21, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandd %k6, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $20, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k6, %k1 -; SKX-NEXT: kshiftrd $19, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k5, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $18, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k4, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $17, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k3, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $16, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $15, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: vmovd %esi, %xmm0 +; SKX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpbroadcastb {{[0-9]+}}(%rsp), %xmm1 +; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm1 +; SKX-NEXT: vpbroadcastb {{[0-9]+}}(%rsp), %xmm2 +; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; SKX-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SKX-NEXT: vptestmb %ymm2, %ymm1, %k1 +; SKX-NEXT: vptestmb %ymm2, %ymm0, %k0 {%k1} ; SKX-NEXT: kshiftrd $16, %k0, %k1 ; SKX-NEXT: kmovd %k1, %edx ; SKX-NEXT: kshiftrd $1, %k0, %k1 @@ -1300,6 +939,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; SKX-NEXT: popq %r14 ; SKX-NEXT: popq %r15 ; SKX-NEXT: popq %rbp +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; KNL_X32-LABEL: test16: @@ -1308,236 +948,47 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: pushl %ebx ; KNL_X32-NEXT: pushl %edi ; KNL_X32-NEXT: pushl %esi -; KNL_X32-NEXT: subl $16, %esp -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-5, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-9, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-17, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-33, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-65, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-129, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $2, %k5, %k5 -; KNL_X32-NEXT: korw %k5, %k0, %k5 -; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k0, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $1, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $1, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw %k5, (%esp) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $14, %k5, %k5 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k5, %k6, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kandw %k7, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kandw %k4, %k5, %k4 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $5, %k5, %k5 -; KNL_X32-NEXT: korw %k5, %k4, %k4 -; KNL_X32-NEXT: kandw %k3, %k4, %k3 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $4, %k4, %k4 -; KNL_X32-NEXT: korw %k4, %k3, %k3 -; KNL_X32-NEXT: kandw %k2, %k3, %k2 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $3, %k3, %k3 -; KNL_X32-NEXT: korw %k3, %k2, %k2 -; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $2, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kandw %k0, %k1, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kmovw (%esp), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm1 +; KNL_X32-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ; KNL_X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_X32-NEXT: kmovw {{[0-9]+}}(%esp), %k2 -; KNL_X32-NEXT: kandw %k1, %k2, %k1 +; KNL_X32-NEXT: kandw %k0, %k1, %k1 +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k2 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k2} ; KNL_X32-NEXT: kmovw %k1, %ebx ; KNL_X32-NEXT: kshiftrw $1, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %ebp @@ -1616,7 +1067,6 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: orl %ebx, %ecx ; KNL_X32-NEXT: orl %edx, %ecx ; KNL_X32-NEXT: movw %cx, (%eax) -; KNL_X32-NEXT: addl $16, %esp ; KNL_X32-NEXT: popl %esi ; KNL_X32-NEXT: popl %edi ; KNL_X32-NEXT: popl %ebx @@ -1631,224 +1081,47 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; FASTISEL-NEXT: pushq %r13 ; FASTISEL-NEXT: pushq %r12 ; FASTISEL-NEXT: pushq %rbx -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; FASTISEL-NEXT: movq %rdi, %rax -; FASTISEL-NEXT: kshiftld $31, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftrd $30, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $31, %k1, %k1 -; FASTISEL-NEXT: kord %k0, %k1, %k0 -; FASTISEL-NEXT: movl $-5, %edi -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $29, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-9, %edi -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $28, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-17, %edi -; FASTISEL-NEXT: kmovd %edi, %k2 -; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandd %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $27, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-33, %edi -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $26, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-65, %edi -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $25, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-129, %edi -; FASTISEL-NEXT: kmovd %edi, %k2 -; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandd %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $24, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-257, %edi ## imm = 0xFEFF -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $23, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-513, %edi ## imm = 0xFDFF -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-1025, %edi ## imm = 0xFBFF -; FASTISEL-NEXT: kmovd %edi, %k2 -; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandd %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-2049, %edi ## imm = 0xF7FF -; FASTISEL-NEXT: kmovd %edi, %k6 -; FASTISEL-NEXT: kandd %k6, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-4097, %edi ## imm = 0xEFFF -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-8193, %edi ## imm = 0xDFFF -; FASTISEL-NEXT: kmovd %edi, %k5 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandd %k5, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-16385, %edi ## imm = 0xBFFF -; FASTISEL-NEXT: kmovd %edi, %k4 -; FASTISEL-NEXT: kandd %k4, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: movl $-32769, %edi ## imm = 0xFFFF7FFF -; FASTISEL-NEXT: kmovd %edi, %k3 -; FASTISEL-NEXT: kandd %k3, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $16, %k7, %k7 -; FASTISEL-NEXT: kord %k7, %k0, %k7 -; FASTISEL-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF -; FASTISEL-NEXT: kmovd %edi, %k2 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kandd %k2, %k7, %k7 -; FASTISEL-NEXT: kshiftld $31, %k0, %k0 -; FASTISEL-NEXT: kshiftrd $15, %k0, %k0 -; FASTISEL-NEXT: kord %k0, %k7, %k0 -; FASTISEL-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kmovd %edx, %k0 -; FASTISEL-NEXT: kshiftld $31, %k0, %k0 -; FASTISEL-NEXT: kshiftrd $30, %k0, %k0 -; FASTISEL-NEXT: kmovd %esi, %k7 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $31, %k7, %k7 -; FASTISEL-NEXT: kord %k0, %k7, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd %ecx, %k7 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $29, %k7, %k7 -; FASTISEL-NEXT: kord %k7, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd %r8d, %k7 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $28, %k7, %k7 -; FASTISEL-NEXT: kord %k7, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd %r9d, %k7 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $27, %k7, %k7 -; FASTISEL-NEXT: kord %k7, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k1 -; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $26, %k7, %k7 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kord %k7, %k1, %k1 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k7, %k1, %k1 -; FASTISEL-NEXT: kshiftld $31, %k0, %k0 -; FASTISEL-NEXT: kshiftrd $25, %k0, %k0 -; FASTISEL-NEXT: kord %k0, %k1, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $24, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $23, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandd %k6, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k6, %k1 -; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k5, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k4, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k3, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $16, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k2, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $15, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpbroadcastb {{[0-9]+}}(%rsp), %xmm1 +; FASTISEL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; FASTISEL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; FASTISEL-NEXT: vmovd %esi, %xmm2 +; FASTISEL-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $3, %r8d, %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpbroadcastb {{[0-9]+}}(%rsp), %xmm3 +; FASTISEL-NEXT: vptestmb %ymm1, %ymm0, %k0 +; FASTISEL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm0 +; FASTISEL-NEXT: vptestmb %ymm1, %ymm0, %k1 +; FASTISEL-NEXT: kandd %k0, %k1, %k0 ; FASTISEL-NEXT: kshiftrd $16, %k0, %k1 ; FASTISEL-NEXT: kmovd %k1, %edx ; FASTISEL-NEXT: kshiftrd $1, %k0, %k1 @@ -1934,6 +1207,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; FASTISEL-NEXT: popq %r14 ; FASTISEL-NEXT: popq %r15 ; FASTISEL-NEXT: popq %rbp +; FASTISEL-NEXT: vzeroupper ; FASTISEL-NEXT: retq %c = and <17 x i1> %a, %b ret <17 x i1> %c @@ -4165,3449 +3439,617 @@ define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) { define i128 @PR179334(<128 x i1> %m) nounwind { ; KNL-LABEL: PR179334: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-5, %ax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-9, %ax -; KNL-NEXT: kmovw %eax, %k3 -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-17, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-33, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-65, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-129, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-257, %ax ## imm = 0xFEFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $2, %k5, %k5 -; KNL-NEXT: korw %k5, %k0, %k5 -; KNL-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; KNL-NEXT: kmovw %eax, %k4 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %r10d -; KNL-NEXT: andl $1, %edi -; KNL-NEXT: kmovw %esi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: kmovw %r8d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: kmovw %r9d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %ecx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %esi -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: andl $1, %edx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %edi -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: andl $1, %edx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d -; KNL-NEXT: kmovw %r8d, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %r8d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: andl $1, %edx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d -; KNL-NEXT: kmovw %r9d, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kshiftlw $1, %k5, %k5 -; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw %k5, %r9d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: andl $1, %edx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; KNL-NEXT: kmovw %r11d, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: korw %k5, %k6, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k3, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k4, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k2, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $5, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $4, %k4, %k4 -; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kandw %k0, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $3, %k3, %k3 -; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kandw %k1, %k2, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $2, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k7, %k1, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; KNL-NEXT: kmovw %edx, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vmovd %edi, %xmm1 +; KNL-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 ; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: shll $16, %r10d -; KNL-NEXT: orl %r10d, %ecx +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: shll $16, %eax -; KNL-NEXT: orl %esi, %eax +; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 ; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: shll $16, %r8d -; KNL-NEXT: orl %edi, %r8d +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %edx, %ecx +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %edx ; KNL-NEXT: shll $16, %edx -; KNL-NEXT: orl %r9d, %edx +; KNL-NEXT: orl %esi, %edx ; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %r8, %rdx +; KNL-NEXT: orq %rcx, %rdx ; KNL-NEXT: retq ; ; SKX-LABEL: PR179334: ; SKX: ## %bb.0: -; SKX-NEXT: subq $328, %rsp ## imm = 0x148 -; SKX-NEXT: kmovd %esi, %k0 -; SKX-NEXT: kshiftlq $63, %k0, %k0 -; SKX-NEXT: kshiftrq $62, %k0, %k0 -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $63, %k1, %k1 -; SKX-NEXT: korq %k0, %k1, %k0 -; SKX-NEXT: movq $-5, %rax -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovd %edx, %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $61, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-9, %rax -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovd %ecx, %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $60, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-17, %rax -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovd %r8d, %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $59, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-33, %rax -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovd %r9d, %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $58, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-65, %rax -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $57, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-129, %rax -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $56, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-257, %rax ## imm = 0xFEFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $55, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-513, %rax ## imm = 0xFDFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $54, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-1025, %rax ## imm = 0xFBFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $53, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-2049, %rax ## imm = 0xF7FF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $52, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-4097, %rax ## imm = 0xEFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $51, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-8193, %rax ## imm = 0xDFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $50, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-16385, %rax ## imm = 0xBFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $49, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-32769, %rax ## imm = 0xFFFF7FFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $48, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-65537, %rax ## imm = 0xFFFEFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $47, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-131073, %rax ## imm = 0xFFFDFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $46, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-262145, %rax ## imm = 0xFFFBFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $45, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-524289, %rax ## imm = 0xFFF7FFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $44, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-1048577, %rax ## imm = 0xFFEFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $43, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-2097153, %rax ## imm = 0xFFDFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $42, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-4194305, %rax ## imm = 0xFFBFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $41, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-8388609, %rax ## imm = 0xFF7FFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $40, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-16777217, %rax ## imm = 0xFEFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $39, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-33554433, %rax ## imm = 0xFDFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $38, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-67108865, %rax ## imm = 0xFBFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $37, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-134217729, %rax ## imm = 0xF7FFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $36, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-268435457, %rax ## imm = 0xEFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $35, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-536870913, %rax ## imm = 0xDFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $34, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movq $-1073741825, %rax ## imm = 0xBFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $33, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-2147483649, %rax ## imm = 0xFFFFFFFF7FFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $32, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-4294967297, %rax ## imm = 0xFFFFFFFEFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $31, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-8589934593, %rax ## imm = 0xFFFFFFFDFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $30, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-17179869185, %rax ## imm = 0xFFFFFFFBFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $29, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-34359738369, %rax ## imm = 0xFFFFFFF7FFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $28, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-68719476737, %rax ## imm = 0xFFFFFFEFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $27, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-137438953473, %rax ## imm = 0xFFFFFFDFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $26, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-274877906945, %rax ## imm = 0xFFFFFFBFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $25, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-549755813889, %rax ## imm = 0xFFFFFF7FFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $24, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-1099511627777, %rax ## imm = 0xFFFFFEFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $23, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-2199023255553, %rax ## imm = 0xFFFFFDFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $22, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-4398046511105, %rax ## imm = 0xFFFFFBFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, (%rsp) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $21, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-8796093022209, %rax ## imm = 0xFFFFF7FFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $20, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-17592186044417, %rax ## imm = 0xFFFFEFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $19, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-35184372088833, %rax ## imm = 0xFFFFDFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $18, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-70368744177665, %rax ## imm = 0xFFFFBFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $17, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-140737488355329, %rax ## imm = 0xFFFF7FFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $16, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-281474976710657, %rax ## imm = 0xFFFEFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $15, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-562949953421313, %rax ## imm = 0xFFFDFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $14, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-1125899906842625, %rax ## imm = 0xFFFBFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $13, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-2251799813685249, %rax ## imm = 0xFFF7FFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $12, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-4503599627370497, %rax ## imm = 0xFFEFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $11, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-9007199254740993, %rax ## imm = 0xFFDFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $10, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-18014398509481985, %rax ## imm = 0xFFBFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $9, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-36028797018963969, %rax ## imm = 0xFF7FFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $8, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-72057594037927937, %rax ## imm = 0xFEFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $7, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-144115188075855873, %rax ## imm = 0xFDFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $6, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-288230376151711745, %rax ## imm = 0xFBFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k6 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k6, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $5, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-576460752303423489, %rax ## imm = 0xF7FFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k3 -; SKX-NEXT: kandq %k3, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $4, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: movabsq $-1152921504606846977, %rax ## imm = 0xEFFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k4 -; SKX-NEXT: kandq %k4, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlq $63, %k2, %k2 -; SKX-NEXT: kshiftrq $3, %k2, %k2 -; SKX-NEXT: korq %k2, %k0, %k1 -; SKX-NEXT: movabsq $-2305843009213693953, %rax ## imm = 0xDFFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k5 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kandq %k5, %k1, %k1 -; SKX-NEXT: kshiftlq $63, %k0, %k0 -; SKX-NEXT: kshiftrq $2, %k0, %k0 -; SKX-NEXT: korq %k0, %k1, %k1 -; SKX-NEXT: movabsq $-4611686018427387905, %rax ## imm = 0xBFFFFFFFFFFFFFFF -; SKX-NEXT: kmovq %rax, %k2 -; SKX-NEXT: kandq %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $62, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kshiftlq $1, %k1, %k1 -; SKX-NEXT: kshiftrq $1, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k0 -; SKX-NEXT: kmovq %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $62, %k1, %k1 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $63, %k7, %k7 -; SKX-NEXT: korq %k1, %k7, %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; SKX-NEXT: kandq %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $61, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; SKX-NEXT: kandq %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $60, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; SKX-NEXT: kandq %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $59, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; SKX-NEXT: kandq %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $58, %k7, %k7 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; SKX-NEXT: kandq %k0, %k1, %k1 -; SKX-NEXT: kshiftlq $63, %k7, %k7 -; SKX-NEXT: kshiftrq $57, %k7, %k7 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: korq %k7, %k1, %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k1, %k1 -; SKX-NEXT: kshiftlq $63, %k0, %k0 -; SKX-NEXT: kshiftrq $56, %k0, %k0 -; SKX-NEXT: korq %k0, %k1, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $55, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $54, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $53, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $52, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $51, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $50, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $49, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $48, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $47, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $46, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $45, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $44, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $43, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $42, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $41, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $40, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $39, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $38, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $37, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $36, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $35, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $34, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $33, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $32, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $31, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $30, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $29, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $28, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $27, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $26, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $25, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $24, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $23, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $22, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq (%rsp), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $21, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $20, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $19, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $18, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $17, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $16, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $15, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $14, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $13, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $12, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $11, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $10, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; SKX-NEXT: kandq %k7, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $9, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k7, %k1 -; SKX-NEXT: kshiftrq $8, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $7, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $6, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kandq %k6, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $5, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kandq %k3, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $4, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandq %k4, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: kshiftrq $3, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kandq %k5, %k0, %k0 -; SKX-NEXT: kshiftlq $63, %k3, %k1 -; SKX-NEXT: kshiftrq $2, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kandq %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $62, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kshiftlq $1, %k0, %k0 -; SKX-NEXT: kshiftrq $1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlq $63, %k1, %k1 -; SKX-NEXT: korq %k1, %k0, %k0 -; SKX-NEXT: kmovq %k0, %rdx -; SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload +; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vmovd %edi, %xmm2 +; SKX-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 +; SKX-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 +; SKX-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SKX-NEXT: vptestmb %zmm0, %zmm1, %k0 +; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; SKX-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; SKX-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; SKX-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; SKX-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm4, %xmm2 +; SKX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; SKX-NEXT: vptestmb %zmm0, %zmm1, %k1 +; SKX-NEXT: kmovq %k1, %rdx ; SKX-NEXT: kmovq %k0, %rax -; SKX-NEXT: addq $328, %rsp ## imm = 0x148 +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; KNL_X32-LABEL: PR179334: ; KNL_X32: ## %bb.0: -; KNL_X32-NEXT: subl $40, %esp -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-5, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: kmovw %k1, %k7 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-9, %ax -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-17, %ax -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-33, %ax -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-65, %ax -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-129, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $2, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $4, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $3, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_X32-NEXT: kmovw %ecx, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $14, %k6, %k6 -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: korw %k6, %k7, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k5, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $8, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kandw %k4, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $7, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k4, %k6, %k6 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $6, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kandw %k2, %k6, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $5, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k5, %k5 -; KNL_X32-NEXT: kandw %k3, %k5, %k4 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $4, %k5, %k5 -; KNL_X32-NEXT: korw %k5, %k4, %k4 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k2, %k4, %k3 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $3, %k4, %k4 -; KNL_X32-NEXT: korw %k4, %k3, %k3 -; KNL_X32-NEXT: kandw %k1, %k3, %k2 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $2, %k3, %k3 -; KNL_X32-NEXT: korw %k3, %k2, %k2 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $1, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $1, %k1, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL_X32-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL_X32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k0 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm2, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k1 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k2 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k3 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k4 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm2, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k5 +; KNL_X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_X32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL_X32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k6 +; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_X32-NEXT: vptestmd %zmm0, %zmm1, %k7 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_X32-NEXT: kmovw %k1, 14(%eax) -; KNL_X32-NEXT: kmovw %k0, 12(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; KNL_X32-NEXT: kmovw %k0, 10(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; KNL_X32-NEXT: kmovw %k0, 8(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; KNL_X32-NEXT: kmovw %k0, 6(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; KNL_X32-NEXT: kmovw %k0, 4(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; KNL_X32-NEXT: kmovw %k0, 2(%eax) -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload +; KNL_X32-NEXT: kmovw %k7, 14(%eax) +; KNL_X32-NEXT: kmovw %k6, 12(%eax) +; KNL_X32-NEXT: kmovw %k5, 10(%eax) +; KNL_X32-NEXT: kmovw %k4, 8(%eax) +; KNL_X32-NEXT: kmovw %k3, 6(%eax) +; KNL_X32-NEXT: kmovw %k2, 4(%eax) +; KNL_X32-NEXT: kmovw %k1, 2(%eax) ; KNL_X32-NEXT: kmovw %k0, (%eax) -; KNL_X32-NEXT: addl $40, %esp ; KNL_X32-NEXT: retl $4 ; ; FASTISEL-LABEL: PR179334: ; FASTISEL: ## %bb.0: -; FASTISEL-NEXT: subq $328, %rsp ## imm = 0x148 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kshiftlq $63, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftrq $62, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $63, %k1, %k1 -; FASTISEL-NEXT: korq %k0, %k1, %k0 -; FASTISEL-NEXT: movq $-5, %rax -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $61, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-9, %rax -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $60, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-17, %rax -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $59, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-33, %rax -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $58, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-65, %rax -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $57, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-129, %rax -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $56, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-257, %rax ## imm = 0xFEFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $55, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-513, %rax ## imm = 0xFDFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $54, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-1025, %rax ## imm = 0xFBFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $53, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-2049, %rax ## imm = 0xF7FF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $52, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-4097, %rax ## imm = 0xEFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $51, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-8193, %rax ## imm = 0xDFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $50, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-16385, %rax ## imm = 0xBFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $49, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-32769, %rax ## imm = 0xFFFF7FFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $48, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-65537, %rax ## imm = 0xFFFEFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $47, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-131073, %rax ## imm = 0xFFFDFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $46, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-262145, %rax ## imm = 0xFFFBFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $45, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-524289, %rax ## imm = 0xFFF7FFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $44, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-1048577, %rax ## imm = 0xFFEFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $43, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-2097153, %rax ## imm = 0xFFDFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $42, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-4194305, %rax ## imm = 0xFFBFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $41, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-8388609, %rax ## imm = 0xFF7FFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $40, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-16777217, %rax ## imm = 0xFEFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $39, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-33554433, %rax ## imm = 0xFDFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $38, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-67108865, %rax ## imm = 0xFBFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $37, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-134217729, %rax ## imm = 0xF7FFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $36, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-268435457, %rax ## imm = 0xEFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $35, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-536870913, %rax ## imm = 0xDFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $34, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movq $-1073741825, %rax ## imm = 0xBFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $33, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-2147483649, %rax ## imm = 0xFFFFFFFF7FFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $32, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-4294967297, %rax ## imm = 0xFFFFFFFEFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $31, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-8589934593, %rax ## imm = 0xFFFFFFFDFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $30, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-17179869185, %rax ## imm = 0xFFFFFFFBFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $29, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-34359738369, %rax ## imm = 0xFFFFFFF7FFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $28, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-68719476737, %rax ## imm = 0xFFFFFFEFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $27, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-137438953473, %rax ## imm = 0xFFFFFFDFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $26, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-274877906945, %rax ## imm = 0xFFFFFFBFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $25, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-549755813889, %rax ## imm = 0xFFFFFF7FFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $24, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-1099511627777, %rax ## imm = 0xFFFFFEFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $23, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-2199023255553, %rax ## imm = 0xFFFFFDFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $22, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-4398046511105, %rax ## imm = 0xFFFFFBFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, (%rsp) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $21, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-8796093022209, %rax ## imm = 0xFFFFF7FFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $20, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-17592186044417, %rax ## imm = 0xFFFFEFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $19, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-35184372088833, %rax ## imm = 0xFFFFDFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $18, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-70368744177665, %rax ## imm = 0xFFFFBFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $17, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-140737488355329, %rax ## imm = 0xFFFF7FFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $16, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-281474976710657, %rax ## imm = 0xFFFEFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $15, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-562949953421313, %rax ## imm = 0xFFFDFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $14, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-1125899906842625, %rax ## imm = 0xFFFBFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $13, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-2251799813685249, %rax ## imm = 0xFFF7FFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $12, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-4503599627370497, %rax ## imm = 0xFFEFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $11, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-9007199254740993, %rax ## imm = 0xFFDFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $10, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-18014398509481985, %rax ## imm = 0xFFBFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $9, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-36028797018963969, %rax ## imm = 0xFF7FFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $8, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-72057594037927937, %rax ## imm = 0xFEFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $7, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-144115188075855873, %rax ## imm = 0xFDFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k1 -; FASTISEL-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $6, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-288230376151711745, %rax ## imm = 0xFBFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k6 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k6, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $5, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-576460752303423489, %rax ## imm = 0xF7FFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k3 -; FASTISEL-NEXT: kandq %k3, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $4, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: movabsq $-1152921504606846977, %rax ## imm = 0xEFFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k4 -; FASTISEL-NEXT: kandq %k4, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kshiftlq $63, %k2, %k2 -; FASTISEL-NEXT: kshiftrq $3, %k2, %k2 -; FASTISEL-NEXT: korq %k2, %k0, %k1 -; FASTISEL-NEXT: movabsq $-2305843009213693953, %rax ## imm = 0xDFFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k5 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kandq %k5, %k1, %k1 -; FASTISEL-NEXT: kshiftlq $63, %k0, %k0 -; FASTISEL-NEXT: kshiftrq $2, %k0, %k0 -; FASTISEL-NEXT: korq %k0, %k1, %k1 -; FASTISEL-NEXT: movabsq $-4611686018427387905, %rax ## imm = 0xBFFFFFFFFFFFFFFF -; FASTISEL-NEXT: kmovq %rax, %k2 -; FASTISEL-NEXT: kandq %k2, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: kshiftlq $62, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kshiftlq $1, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $1, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k0 -; FASTISEL-NEXT: kmovq %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; FASTISEL-NEXT: kmovd %esi, %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $62, %k1, %k1 -; FASTISEL-NEXT: kmovd %edi, %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $63, %k7, %k7 -; FASTISEL-NEXT: korq %k1, %k7, %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k0, %k1, %k1 -; FASTISEL-NEXT: kmovd %edx, %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $61, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k0, %k1, %k1 -; FASTISEL-NEXT: kmovd %ecx, %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $60, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k0, %k1, %k1 -; FASTISEL-NEXT: kmovd %r8d, %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $59, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k0, %k1, %k1 -; FASTISEL-NEXT: kmovd %r9d, %k7 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $58, %k7, %k7 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k0, %k1, %k1 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k7 -; FASTISEL-NEXT: kshiftrq $57, %k7, %k7 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: korq %k7, %k1, %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k1, %k1 -; FASTISEL-NEXT: kshiftlq $63, %k0, %k0 -; FASTISEL-NEXT: kshiftrq $56, %k0, %k0 -; FASTISEL-NEXT: korq %k0, %k1, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $55, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $54, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $53, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $52, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $51, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $50, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $49, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $48, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $47, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $46, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $45, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $44, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $43, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $42, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $41, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $40, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $39, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $38, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $37, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $36, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $35, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $34, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $33, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $32, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $31, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $30, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $29, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $28, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $27, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $26, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $25, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $24, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $23, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $22, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq (%rsp), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $21, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $20, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $19, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $18, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $17, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $16, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $15, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $14, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $13, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $12, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $11, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $10, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $9, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k7, %k1 -; FASTISEL-NEXT: kshiftrq $8, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $7, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; FASTISEL-NEXT: kandq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $6, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kandq %k6, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $5, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kandq %k3, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $4, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandq %k4, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: kshiftrq $3, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kandq %k5, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $63, %k3, %k1 -; FASTISEL-NEXT: kshiftrq $2, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kandq %k2, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $62, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlq $1, %k0, %k0 -; FASTISEL-NEXT: kshiftrq $1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlq $63, %k1, %k1 -; FASTISEL-NEXT: korq %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq %k0, %rax -; FASTISEL-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload +; FASTISEL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; FASTISEL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 +; FASTISEL-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; FASTISEL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vptestmb %zmm0, %zmm1, %k0 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm1 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; FASTISEL-NEXT: vmovd %edi, %xmm3 +; FASTISEL-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $4, %r8d, %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $5, %r9d, %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; FASTISEL-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; FASTISEL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; FASTISEL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; FASTISEL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm4, %xmm2 +; FASTISEL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; FASTISEL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; FASTISEL-NEXT: vptestmb %zmm0, %zmm1, %k1 +; FASTISEL-NEXT: kmovq %k1, %rax ; FASTISEL-NEXT: kmovq %k0, %rdx -; FASTISEL-NEXT: addq $328, %rsp ## imm = 0x148 +; FASTISEL-NEXT: vzeroupper ; FASTISEL-NEXT: retq %t = bitcast <128 x i1> %m to i128 ret i128 %t diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 2a2c95d85c3f7..0e0a9f6546529 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1887,444 +1887,89 @@ define void @extload_v8i64(ptr %a, ptr %res) { define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: # %bb.0: -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-5, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k2 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-9, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k7 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-17, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-33, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-65, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k3 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-129, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k4 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: andl $1, %edi -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: korw %k1, %k6, %k1 -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: kmovw %edx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw %k7, %k0 -; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k7, %k1, %k1 -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: kmovw %r8d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k1, %k1 -; KNL-NEXT: kmovw %r9d, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: korw %k1, %k6, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k7, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $14, %k6, %k6 -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: korw %k6, %k7, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k3, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k4, %k6, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kandw %k0, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $4, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kandw %k2, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $3, %k4, %k4 -; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $2, %k3, %k3 -; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k2, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k2 -; KNL-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1 -; KNL-NEXT: vpternlogd {{.*#+}} zmm3 {%k1} {z} = -1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: vpternlogd {{.*#+}} zmm4 {%k1} {z} = -1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: vpternlogd {{.*#+}} zmm5 {%k1} {z} = -1 +; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL-NEXT: vptestmd %zmm3, %zmm2, %k1 +; KNL-NEXT: vmovd %edi, %xmm2 +; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm3, %zmm2, %k2 +; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm3, %zmm2, %k3 +; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm3, %zmm2, %k4 +; KNL-NEXT: vpternlogd {{.*#+}} zmm2 {%k4} {z} = -1 ; KNL-NEXT: vpmovdw %zmm2, %ymm2 +; KNL-NEXT: vpternlogd {{.*#+}} zmm3 {%k3} {z} = -1 ; KNL-NEXT: vpmovdw %zmm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 ; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1 -; KNL-NEXT: vpmovdw %zmm4, %ymm2 -; KNL-NEXT: vpmovdw %zmm5, %ymm3 +; KNL-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1 +; KNL-NEXT: vpmovdw %zmm2, %ymm2 +; KNL-NEXT: vpternlogd {{.*#+}} zmm3 {%k1} {z} = -1 +; KNL-NEXT: vpmovdw %zmm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 ; KNL-NEXT: vpandq %zmm0, %zmm2, %zmm0 ; KNL-NEXT: retq @@ -2340,444 +1985,89 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; ; AVX512DQNOBW-LABEL: test21: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: andl $1, %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-5, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k2 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-9, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k7 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-17, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-33, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-65, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k3 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-129, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k4 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k5 -; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k1 -; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF -; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k0, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k0 -; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: andl $1, %edi -; AVX512DQNOBW-NEXT: kmovw %esi, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %edi, %k6 -; AVX512DQNOBW-NEXT: korw %k1, %k6, %k1 -; AVX512DQNOBW-NEXT: kandw %k2, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %edx, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %k7, %k0 -; AVX512DQNOBW-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k7, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k2, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %r8d, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %r9d, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k3, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k3, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k4, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k5, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k4, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: andl $1, %eax -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; AVX512DQNOBW-NEXT: kmovw %ecx, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: korw %k1, %k6, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k0, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k2, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k7, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k0, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k3, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k3, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k5, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kandw %k4, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k0, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k2, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: andl $1, %eax -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 -; AVX512DQNOBW-NEXT: kandw %k0, %k5, %k4 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k5 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 -; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 -; AVX512DQNOBW-NEXT: kandw %k2, %k4, %k3 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k4 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 -; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k0, %k3, %k2 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k3 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3 -; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k0, %k2, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm2 -; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm3 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5 +; AVX512DQNOBW-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512DQNOBW-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512DQNOBW-NEXT: vptestmd %zmm3, %zmm2, %k0 +; AVX512DQNOBW-NEXT: vmovd %edi, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vptestmd %zmm3, %zmm2, %k1 +; AVX512DQNOBW-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512DQNOBW-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vptestmd %zmm3, %zmm2, %k2 +; AVX512DQNOBW-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512DQNOBW-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vptestmd %zmm3, %zmm2, %k3 +; AVX512DQNOBW-NEXT: vpmovm2d %k3, %zmm2 ; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512DQNOBW-NEXT: vpmovm2d %k2, %zmm3 ; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 ; AVX512DQNOBW-NEXT: vpandq %zmm1, %zmm2, %zmm1 -; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm2 -; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm3 +; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm2 +; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm3 +; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm2, %zmm0 ; AVX512DQNOBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index 611e3a1315d67..52cffd38def78 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -374,1101 +374,215 @@ define <80 x i32> @test_maskz_load_v80i32(ptr %p, <80 x i1> %mask) nounwind { ; CHECK64-LABEL: test_maskz_load_v80i32: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movq %rdi, %rax -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: andl $1, %edi -; CHECK64-NEXT: kmovw %edi, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $14, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-5, %di -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: kmovw %k1, %k7 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $13, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-9, %di -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $12, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-17, %di -; CHECK64-NEXT: kmovw %edi, %k4 -; CHECK64-NEXT: kandw %k4, %k0, %k0 -; CHECK64-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $11, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-33, %di -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $10, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-65, %di -; CHECK64-NEXT: kmovw %edi, %k3 -; CHECK64-NEXT: kandw %k3, %k0, %k0 -; CHECK64-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $9, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-129, %di -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: kmovw %k1, %k2 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $8, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-257, %di # imm = 0xFEFF -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $7, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-513, %di # imm = 0xFDFF -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $6, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-1025, %di # imm = 0xFBFF -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $5, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-2049, %di # imm = 0xF7FF -; CHECK64-NEXT: kmovw %edi, %k5 -; CHECK64-NEXT: kandw %k5, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $4, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-4097, %di # imm = 0xEFFF -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $3, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k0 -; CHECK64-NEXT: movw $-8193, %di # imm = 0xDFFF -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $2, %k1, %k1 -; CHECK64-NEXT: korw %k1, %k0, %k1 -; CHECK64-NEXT: movw $-16385, %di # imm = 0xBFFF -; CHECK64-NEXT: kmovw %edi, %k0 -; CHECK64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $14, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kshiftlw $1, %k1, %k1 -; CHECK64-NEXT: kshiftrw $1, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: andl $1, %edi -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK64-NEXT: kmovw %r10d, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $14, %k1, %k1 -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: korw %k1, %k6, %k1 -; CHECK64-NEXT: kandw %k7, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $13, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; CHECK64-NEXT: kandw %k7, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $12, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k4, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $11, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $10, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k3, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $9, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k2, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $8, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $7, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; CHECK64-NEXT: kandw %k4, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $6, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; CHECK64-NEXT: kandw %k2, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $5, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: kandw %k5, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $4, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $3, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $2, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $14, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kshiftlw $1, %k1, %k1 -; CHECK64-NEXT: kshiftrw $1, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: andl $1, %edi -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK64-NEXT: kmovw %r10d, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $14, %k1, %k1 -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: korw %k1, %k6, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $13, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k7, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $12, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; CHECK64-NEXT: kandw %k7, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $11, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $10, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $9, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $8, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k3, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $7, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k4, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $6, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k2, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $5, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k5, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $4, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; CHECK64-NEXT: kandw %k5, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $3, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; CHECK64-NEXT: kandw %k2, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $2, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $14, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kshiftlw $1, %k1, %k1 -; CHECK64-NEXT: kshiftrw $1, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: andl $1, %edi -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK64-NEXT: kmovw %r10d, %k1 -; CHECK64-NEXT: kshiftlw $15, %k1, %k1 -; CHECK64-NEXT: kshiftrw $14, %k1, %k1 -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: korw %k1, %k6, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; CHECK64-NEXT: kandw %k4, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $13, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $12, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k7, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $11, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $10, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $9, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $8, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $7, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $6, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $5, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; CHECK64-NEXT: kandw %k6, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $4, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k5, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $3, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k2, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $2, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kandw %k3, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $14, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: kshiftlw $1, %k1, %k1 -; CHECK64-NEXT: kshiftrw $1, %k1, %k1 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK64-NEXT: kmovw %edi, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k1, %k1 -; CHECK64-NEXT: andl $1, %edx -; CHECK64-NEXT: kmovw %ecx, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $14, %k6, %k6 -; CHECK64-NEXT: kmovw %edx, %k7 -; CHECK64-NEXT: korw %k6, %k7, %k6 -; CHECK64-NEXT: kandw %k4, %k6, %k6 -; CHECK64-NEXT: kmovw %r8d, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $13, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k6, %k6 -; CHECK64-NEXT: kmovw %r9d, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $12, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $11, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $10, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; CHECK64-NEXT: kandw %k3, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $9, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kandw %k0, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $8, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $7, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k6, %k6 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k7 -; CHECK64-NEXT: kshiftlw $15, %k7, %k7 -; CHECK64-NEXT: kshiftrw $6, %k7, %k7 -; CHECK64-NEXT: korw %k7, %k6, %k6 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k6, %k5 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k6 -; CHECK64-NEXT: kshiftlw $15, %k6, %k6 -; CHECK64-NEXT: kshiftrw $5, %k6, %k6 -; CHECK64-NEXT: korw %k6, %k5, %k5 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k5, %k4 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k5 -; CHECK64-NEXT: kshiftlw $15, %k5, %k5 -; CHECK64-NEXT: kshiftrw $4, %k5, %k5 -; CHECK64-NEXT: korw %k5, %k4, %k4 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k4, %k3 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k4 -; CHECK64-NEXT: kshiftlw $15, %k4, %k4 -; CHECK64-NEXT: kshiftrw $3, %k4, %k4 -; CHECK64-NEXT: korw %k4, %k3, %k3 -; CHECK64-NEXT: kandw %k2, %k3, %k2 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k3 -; CHECK64-NEXT: kshiftlw $15, %k3, %k3 -; CHECK64-NEXT: kshiftrw $2, %k3, %k3 -; CHECK64-NEXT: korw %k3, %k2, %k2 -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; CHECK64-NEXT: kandw %k0, %k2, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k2 -; CHECK64-NEXT: kshiftlw $14, %k2, %k2 -; CHECK64-NEXT: korw %k2, %k0, %k0 -; CHECK64-NEXT: kshiftlw $1, %k0, %k0 -; CHECK64-NEXT: kshiftrw $1, %k0, %k0 -; CHECK64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK64-NEXT: kmovw %ecx, %k2 -; CHECK64-NEXT: kshiftlw $15, %k2, %k2 -; CHECK64-NEXT: korw %k2, %k0, %k2 -; CHECK64-NEXT: vmovdqu32 (%rsi), %zmm0 {%k2} {z} -; CHECK64-NEXT: vmovdqu32 64(%rsi), %zmm1 {%k1} {z} -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; CHECK64-NEXT: vmovdqu32 128(%rsi), %zmm2 {%k1} {z} -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; CHECK64-NEXT: vmovdqu32 192(%rsi), %zmm3 {%k1} {z} -; CHECK64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; CHECK64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK64-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK64-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK64-NEXT: vptestmd %zmm0, %zmm1, %k1 +; CHECK64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK64-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK64-NEXT: vptestmd %zmm0, %zmm1, %k2 +; CHECK64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK64-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK64-NEXT: vptestmd %zmm0, %zmm1, %k3 +; CHECK64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK64-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK64-NEXT: vptestmd %zmm0, %zmm1, %k4 +; CHECK64-NEXT: vmovd %edx, %xmm1 +; CHECK64-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $2, %r8d, %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $3, %r9d, %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK64-NEXT: vptestmd %zmm0, %zmm1, %k5 +; CHECK64-NEXT: vmovdqu32 (%rsi), %zmm0 {%k5} {z} +; CHECK64-NEXT: vmovdqu32 64(%rsi), %zmm1 {%k4} {z} +; CHECK64-NEXT: vmovdqu32 128(%rsi), %zmm2 {%k3} {z} +; CHECK64-NEXT: vmovdqu32 192(%rsi), %zmm3 {%k2} {z} ; CHECK64-NEXT: vmovdqu32 256(%rsi), %zmm4 {%k1} {z} -; CHECK64-NEXT: vmovdqa64 %zmm4, 256(%rax) -; CHECK64-NEXT: vmovdqa64 %zmm3, 192(%rax) -; CHECK64-NEXT: vmovdqa64 %zmm2, 128(%rax) -; CHECK64-NEXT: vmovdqa64 %zmm1, 64(%rax) -; CHECK64-NEXT: vmovdqa64 %zmm0, (%rax) +; CHECK64-NEXT: vmovdqa64 %zmm4, 256(%rdi) +; CHECK64-NEXT: vmovdqa64 %zmm3, 192(%rdi) +; CHECK64-NEXT: vmovdqa64 %zmm2, 128(%rdi) +; CHECK64-NEXT: vmovdqa64 %zmm1, 64(%rdi) +; CHECK64-NEXT: vmovdqa64 %zmm0, (%rdi) ; CHECK64-NEXT: vzeroupper ; CHECK64-NEXT: retq ; ; CHECK32-LABEL: test_maskz_load_v80i32: ; CHECK32: # %bb.0: -; CHECK32-NEXT: subl $32, %esp -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: kmovw %eax, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $14, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-5, %ax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: kmovw %k1, %k7 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $13, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-9, %ax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $12, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-17, %ax -; CHECK32-NEXT: kmovw %eax, %k4 -; CHECK32-NEXT: kandw %k4, %k0, %k0 -; CHECK32-NEXT: kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $11, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-33, %ax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $10, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-65, %ax -; CHECK32-NEXT: kmovw %eax, %k3 -; CHECK32-NEXT: kandw %k3, %k0, %k0 -; CHECK32-NEXT: kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $9, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-129, %ax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: kmovw %k1, %k2 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $8, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-257, %ax # imm = 0xFEFF -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $7, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-513, %ax # imm = 0xFDFF -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $6, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-1025, %ax # imm = 0xFBFF -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $5, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-2049, %ax # imm = 0xF7FF -; CHECK32-NEXT: kmovw %eax, %k5 -; CHECK32-NEXT: kandw %k5, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $4, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-4097, %ax # imm = 0xEFFF -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $3, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k0 -; CHECK32-NEXT: movw $-8193, %ax # imm = 0xDFFF -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $2, %k1, %k1 -; CHECK32-NEXT: korw %k1, %k0, %k1 -; CHECK32-NEXT: movw $-16385, %ax # imm = 0xBFFF -; CHECK32-NEXT: kmovw %eax, %k0 -; CHECK32-NEXT: kmovw %k0, (%esp) # 2-byte Spill -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $14, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kshiftlw $1, %k1, %k1 -; CHECK32-NEXT: kshiftrw $1, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: kmovw %ecx, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $14, %k1, %k1 -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: korw %k1, %k6, %k1 -; CHECK32-NEXT: kandw %k7, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $13, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 # 2-byte Reload -; CHECK32-NEXT: kandw %k7, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $12, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k4, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $11, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $10, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k3, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $9, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k2, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $8, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $7, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 # 2-byte Reload -; CHECK32-NEXT: kandw %k4, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $6, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 2-byte Reload -; CHECK32-NEXT: kandw %k2, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $5, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: kandw %k5, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $4, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $3, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $2, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw (%esp), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $14, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kshiftlw $1, %k1, %k1 -; CHECK32-NEXT: kshiftrw $1, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: kmovw %ecx, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $14, %k1, %k1 -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: korw %k1, %k6, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $13, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k7, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $12, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 # 2-byte Reload -; CHECK32-NEXT: kandw %k7, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $11, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $10, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $9, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $8, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k3, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $7, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k4, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $6, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k2, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $5, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k5, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $4, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 # 2-byte Reload -; CHECK32-NEXT: kandw %k5, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $3, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 2-byte Reload -; CHECK32-NEXT: kandw %k2, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $2, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw (%esp), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $14, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kshiftlw $1, %k1, %k1 -; CHECK32-NEXT: kshiftrw $1, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: kmovw %ecx, %k1 -; CHECK32-NEXT: kshiftlw $15, %k1, %k1 -; CHECK32-NEXT: kshiftrw $14, %k1, %k1 -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: korw %k1, %k6, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 # 2-byte Reload -; CHECK32-NEXT: kandw %k4, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $13, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $12, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k7, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $11, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $10, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $9, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $8, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $7, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $6, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $5, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 # 2-byte Reload -; CHECK32-NEXT: kandw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $4, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k5, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $3, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k2, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $2, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kandw %k3, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $14, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: kshiftlw $1, %k1, %k1 -; CHECK32-NEXT: kshiftrw $1, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k1, %k1 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: kmovw %ecx, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $14, %k6, %k6 -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: korw %k6, %k7, %k6 -; CHECK32-NEXT: kandw %k4, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $13, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $12, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $11, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $10, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; CHECK32-NEXT: kandw %k3, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $9, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kandw %k0, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $8, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $7, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k6, %k6 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k7 -; CHECK32-NEXT: kshiftlw $15, %k7, %k7 -; CHECK32-NEXT: kshiftrw $6, %k7, %k7 -; CHECK32-NEXT: korw %k7, %k6, %k6 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k6, %k5 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k6 -; CHECK32-NEXT: kshiftlw $15, %k6, %k6 -; CHECK32-NEXT: kshiftrw $5, %k6, %k6 -; CHECK32-NEXT: korw %k6, %k5, %k5 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k5, %k4 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k5 -; CHECK32-NEXT: kshiftlw $15, %k5, %k5 -; CHECK32-NEXT: kshiftrw $4, %k5, %k5 -; CHECK32-NEXT: korw %k5, %k4, %k4 -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k4, %k3 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k4 -; CHECK32-NEXT: kshiftlw $15, %k4, %k4 -; CHECK32-NEXT: kshiftrw $3, %k4, %k4 -; CHECK32-NEXT: korw %k4, %k3, %k3 -; CHECK32-NEXT: kandw %k2, %k3, %k2 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k3 -; CHECK32-NEXT: kshiftlw $15, %k3, %k3 -; CHECK32-NEXT: kshiftrw $2, %k3, %k3 -; CHECK32-NEXT: korw %k3, %k2, %k2 -; CHECK32-NEXT: kmovw (%esp), %k0 # 2-byte Reload -; CHECK32-NEXT: kandw %k0, %k2, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k2 -; CHECK32-NEXT: kshiftlw $14, %k2, %k2 -; CHECK32-NEXT: korw %k2, %k0, %k0 -; CHECK32-NEXT: kshiftlw $1, %k0, %k0 -; CHECK32-NEXT: kshiftrw $1, %k0, %k0 -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: kmovw %eax, %k2 -; CHECK32-NEXT: kshiftlw $15, %k2, %k2 -; CHECK32-NEXT: korw %k2, %k0, %k2 -; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: vmovdqu32 (%eax), %zmm0 {%k2} {z} -; CHECK32-NEXT: vmovdqu32 64(%eax), %zmm1 {%k1} {z} -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload -; CHECK32-NEXT: vmovdqu32 128(%eax), %zmm2 {%k1} {z} -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload -; CHECK32-NEXT: vmovdqu32 192(%eax), %zmm3 {%k1} {z} -; CHECK32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload -; CHECK32-NEXT: vmovdqu32 256(%eax), %zmm4 {%k1} {z} +; CHECK32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; CHECK32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK32-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK32-NEXT: vptestmd %zmm0, %zmm1, %k1 +; CHECK32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK32-NEXT: vptestmd %zmm0, %zmm1, %k2 +; CHECK32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK32-NEXT: vptestmd %zmm0, %zmm1, %k3 +; CHECK32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK32-NEXT: vptestmd %zmm0, %zmm1, %k4 +; CHECK32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; CHECK32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; CHECK32-NEXT: vptestmd %zmm0, %zmm1, %k5 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: vmovdqu32 (%ecx), %zmm0 {%k5} {z} +; CHECK32-NEXT: vmovdqu32 64(%ecx), %zmm1 {%k4} {z} +; CHECK32-NEXT: vmovdqu32 128(%ecx), %zmm2 {%k3} {z} +; CHECK32-NEXT: vmovdqu32 192(%ecx), %zmm3 {%k2} {z} +; CHECK32-NEXT: vmovdqu32 256(%ecx), %zmm4 {%k1} {z} ; CHECK32-NEXT: vmovdqa64 %zmm4, 256(%eax) ; CHECK32-NEXT: vmovdqa64 %zmm3, 192(%eax) ; CHECK32-NEXT: vmovdqa64 %zmm2, 128(%eax) ; CHECK32-NEXT: vmovdqa64 %zmm1, 64(%eax) ; CHECK32-NEXT: vmovdqa64 %zmm0, (%eax) -; CHECK32-NEXT: addl $32, %esp ; CHECK32-NEXT: vzeroupper ; CHECK32-NEXT: retl $4 %r = call <80 x i32> @llvm.masked.load.v80i32.p0(ptr %p, <80 x i1> %mask, <80 x i32> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 735d85ffee064..4bd67a05d9c7d 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2895,440 +2895,84 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: andl $1, %esi -; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kmovw %edx, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-5, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k7 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-9, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %r8d, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-17, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %r9d, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-33, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k2 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-65, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-129, %ax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k3 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-257, %ax ## imm = 0xFEFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k4 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $8, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $7, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $6, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $4, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $3, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $14, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: kmovw %ecx, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $14, %k6, %k6 -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: korw %k6, %k7, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k3, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k4, %k6, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $5, %k6, %k6 -; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: kandw %k3, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $4, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kandw %k1, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $3, %k4, %k4 -; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kandw %k2, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $2, %k3, %k3 -; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k2, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kmovw %k1, 6(%rdi) -; KNL-NEXT: kmovw %k0, 4(%rdi) -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: kmovw %k0, 2(%rdi) -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload +; KNL-NEXT: vmovd %esi, %xmm0 +; KNL-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0 +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k1 +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k2 +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vptestmd %zmm1, %zmm0, %k3 +; KNL-NEXT: kmovw %k3, 6(%rdi) +; KNL-NEXT: kmovw %k2, 4(%rdi) +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_64i1: @@ -3349,440 +2993,84 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: andl $1, %esi -; AVX512DQ-NEXT: kmovw %esi, %k0 -; AVX512DQ-NEXT: kmovw %edx, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-5, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k7 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kmovw %ecx, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-9, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %r8d, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-17, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %r9d, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-33, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k2 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-65, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-129, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k3 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-257, %ax ## imm = 0xFEFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-513, %ax ## imm = 0xFDFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k4 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; AVX512DQ-NEXT: kmovw %eax, %k5 -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; AVX512DQ-NEXT: kmovw %ecx, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: korw %k0, %k6, %k0 -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; AVX512DQ-NEXT: kmovw %ecx, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: korw %k0, %k6, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; AVX512DQ-NEXT: kmovw %ecx, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6 -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: korw %k6, %k7, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kandw %k3, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kandw %k4, %k6, %k5 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 -; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 -; AVX512DQ-NEXT: korw %k6, %k5, %k5 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k3, %k5, %k4 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k5 -; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 -; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5 -; AVX512DQ-NEXT: korw %k5, %k4, %k4 -; AVX512DQ-NEXT: kandw %k1, %k4, %k3 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k4 -; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 -; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4 -; AVX512DQ-NEXT: korw %k4, %k3, %k3 -; AVX512DQ-NEXT: kandw %k2, %k3, %k2 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k3 -; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 -; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3 -; AVX512DQ-NEXT: korw %k3, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k2, %k1 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, 6(%rdi) -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload +; AVX512DQ-NEXT: vmovd %esi, %xmm0 +; AVX512DQ-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm0, %k1 +; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm0, %k2 +; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm0, %k3 +; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_64i1: diff --git a/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll b/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll index 0a5ad6fc2b0fb..e92c9e66d0fa5 100644 --- a/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll +++ b/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll @@ -183,1446 +183,215 @@ declare void @llvm.masked.store.v16f16.p0(<16 x half>, ptr, i32, <16 x i1>) define void @test_maskz_store_v192i8(ptr %p0, <192 x i1> %mask) nounwind { ; CHECK-LABEL: test_maskz_store_v192i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: subq $376, %rsp ## imm = 0x178 -; CHECK-NEXT: kmovd %edx, %k0 -; CHECK-NEXT: kshiftlq $63, %k0, %k0 -; CHECK-NEXT: kshiftrq $62, %k0, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $63, %k1, %k1 -; CHECK-NEXT: korq %k0, %k1, %k0 -; CHECK-NEXT: movq $-5, %rax -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: kmovd %ecx, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $61, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-9, %rax -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: kmovd %r8d, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $60, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-17, %rax -; CHECK-NEXT: kmovq %rax, %k2 -; CHECK-NEXT: kandq %k2, %k0, %k0 -; CHECK-NEXT: kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kmovd %r9d, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $59, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-33, %rax -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $58, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-65, %rax -; CHECK-NEXT: kmovq %rax, %k3 -; CHECK-NEXT: kandq %k3, %k0, %k0 -; CHECK-NEXT: kmovq %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $57, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-129, %rax -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $56, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-257, %rax ## imm = 0xFEFF -; CHECK-NEXT: kmovq %rax, %k4 -; CHECK-NEXT: kandq %k4, %k0, %k0 -; CHECK-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $55, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-513, %rax ## imm = 0xFDFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $54, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-1025, %rax ## imm = 0xFBFF -; CHECK-NEXT: kmovq %rax, %k5 -; CHECK-NEXT: kandq %k5, %k0, %k0 -; CHECK-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $53, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-2049, %rax ## imm = 0xF7FF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $52, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-4097, %rax ## imm = 0xEFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $51, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-8193, %rax ## imm = 0xDFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $50, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-16385, %rax ## imm = 0xBFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $49, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-32769, %rax ## imm = 0xFFFF7FFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $48, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-65537, %rax ## imm = 0xFFFEFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $47, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-131073, %rax ## imm = 0xFFFDFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $46, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-262145, %rax ## imm = 0xFFFBFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $45, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-524289, %rax ## imm = 0xFFF7FFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $44, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-1048577, %rax ## imm = 0xFFEFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $43, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-2097153, %rax ## imm = 0xFFDFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $42, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-4194305, %rax ## imm = 0xFFBFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $41, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-8388609, %rax ## imm = 0xFF7FFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $40, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-16777217, %rax ## imm = 0xFEFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $39, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-33554433, %rax ## imm = 0xFDFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $38, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-67108865, %rax ## imm = 0xFBFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $37, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-134217729, %rax ## imm = 0xF7FFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $36, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-268435457, %rax ## imm = 0xEFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $35, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-536870913, %rax ## imm = 0xDFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $34, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movq $-1073741825, %rax ## imm = 0xBFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $33, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-2147483649, %rax ## imm = 0xFFFFFFFF7FFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $32, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-4294967297, %rax ## imm = 0xFFFFFFFEFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $31, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-8589934593, %rax ## imm = 0xFFFFFFFDFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $30, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-17179869185, %rax ## imm = 0xFFFFFFFBFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $29, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-34359738369, %rax ## imm = 0xFFFFFFF7FFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $28, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-68719476737, %rax ## imm = 0xFFFFFFEFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $27, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-137438953473, %rax ## imm = 0xFFFFFFDFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $26, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-274877906945, %rax ## imm = 0xFFFFFFBFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $25, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-549755813889, %rax ## imm = 0xFFFFFF7FFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $24, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-1099511627777, %rax ## imm = 0xFFFFFEFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $23, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-2199023255553, %rax ## imm = 0xFFFFFDFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $22, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-4398046511105, %rax ## imm = 0xFFFFFBFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $21, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-8796093022209, %rax ## imm = 0xFFFFF7FFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $20, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-17592186044417, %rax ## imm = 0xFFFFEFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $19, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-35184372088833, %rax ## imm = 0xFFFFDFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $18, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-70368744177665, %rax ## imm = 0xFFFFBFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, (%rsp) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $17, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-140737488355329, %rax ## imm = 0xFFFF7FFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $16, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-281474976710657, %rax ## imm = 0xFFFEFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $15, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-562949953421313, %rax ## imm = 0xFFFDFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $14, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-1125899906842625, %rax ## imm = 0xFFFBFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $13, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-2251799813685249, %rax ## imm = 0xFFF7FFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $12, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-4503599627370497, %rax ## imm = 0xFFEFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $11, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-9007199254740993, %rax ## imm = 0xFFDFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $10, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-18014398509481985, %rax ## imm = 0xFFBFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $9, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-36028797018963969, %rax ## imm = 0xFF7FFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $8, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-72057594037927937, %rax ## imm = 0xFEFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $7, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-144115188075855873, %rax ## imm = 0xFDFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $6, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-288230376151711745, %rax ## imm = 0xFBFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $5, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-576460752303423489, %rax ## imm = 0xF7FFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $4, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-1152921504606846977, %rax ## imm = 0xEFFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: kshiftrq $3, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: movabsq $-2305843009213693953, %rax ## imm = 0xDFFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $2, %k7, %k7 -; CHECK-NEXT: korq %k7, %k0, %k7 -; CHECK-NEXT: movabsq $-4611686018427387905, %rax ## imm = 0xBFFFFFFFFFFFFFFF -; CHECK-NEXT: kmovq %rax, %k0 -; CHECK-NEXT: kmovq %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: kandq %k0, %k7, %k7 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k6 -; CHECK-NEXT: kshiftlq $62, %k6, %k6 -; CHECK-NEXT: korq %k6, %k7, %k6 -; CHECK-NEXT: kshiftlq $1, %k6, %k6 -; CHECK-NEXT: kshiftrq $1, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k1 -; CHECK-NEXT: kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k6 -; CHECK-NEXT: kshiftlq $63, %k6, %k6 -; CHECK-NEXT: kshiftrq $62, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $63, %k7, %k7 -; CHECK-NEXT: korq %k6, %k7, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $61, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; CHECK-NEXT: kandq %k1, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $60, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k2, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $59, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 8-byte Reload -; CHECK-NEXT: kandq %k2, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $58, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k3, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $57, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 8-byte Reload -; CHECK-NEXT: kandq %k3, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $56, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k4, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $55, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload -; CHECK-NEXT: kandq %k4, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $54, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k5, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $53, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload -; CHECK-NEXT: kandq %k5, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $52, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $51, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $50, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $49, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $48, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $47, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $46, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $45, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $44, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $43, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $42, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $41, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $40, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $39, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $38, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $37, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $36, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $35, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $34, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $33, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $32, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $31, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $30, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $29, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $28, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $27, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $26, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $25, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $24, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $23, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $22, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $21, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $20, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $19, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $18, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq (%rsp), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $17, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $16, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $15, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $14, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $13, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $12, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $11, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $10, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $9, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $8, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $7, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $6, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $5, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $4, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $3, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $2, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK-NEXT: kandq %k7, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $62, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kshiftlq $1, %k6, %k6 -; CHECK-NEXT: kshiftrq $1, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k6 -; CHECK-NEXT: kshiftlq $63, %k6, %k6 -; CHECK-NEXT: kshiftrq $62, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $63, %k7, %k7 -; CHECK-NEXT: korq %k6, %k7, %k6 -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $61, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k1, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $60, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $59, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k2, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $58, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $57, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k3, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $56, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $55, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k4, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $54, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $53, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kandq %k5, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $52, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $51, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $50, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $49, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $48, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $47, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $46, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $45, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $44, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $43, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $42, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $41, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $40, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $39, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $38, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $37, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $36, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $35, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $34, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $33, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $32, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $31, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $30, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $29, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $28, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $27, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $26, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $25, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $24, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $23, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $22, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $21, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $20, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $19, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $18, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq (%rsp), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $17, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $16, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $15, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $14, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $13, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $12, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $11, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $10, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $9, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $8, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k6 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k7 -; CHECK-NEXT: kshiftlq $63, %k7, %k7 -; CHECK-NEXT: kshiftrq $7, %k7, %k7 -; CHECK-NEXT: korq %k7, %k6, %k6 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k6, %k5 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k6 -; CHECK-NEXT: kshiftlq $63, %k6, %k6 -; CHECK-NEXT: kshiftrq $6, %k6, %k6 -; CHECK-NEXT: korq %k6, %k5, %k5 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k5, %k4 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k5 -; CHECK-NEXT: kshiftlq $63, %k5, %k5 -; CHECK-NEXT: kshiftrq $5, %k5, %k5 -; CHECK-NEXT: korq %k5, %k4, %k4 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k4, %k3 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k4 -; CHECK-NEXT: kshiftlq $63, %k4, %k4 -; CHECK-NEXT: kshiftrq $4, %k4, %k4 -; CHECK-NEXT: korq %k4, %k3, %k3 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k3, %k2 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k3 -; CHECK-NEXT: kshiftlq $63, %k3, %k3 -; CHECK-NEXT: kshiftrq $3, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k2, %k1 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k2 -; CHECK-NEXT: kshiftlq $63, %k2, %k2 -; CHECK-NEXT: kshiftrq $2, %k2, %k2 -; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK-NEXT: kandq %k0, %k1, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $62, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k0 -; CHECK-NEXT: kshiftlq $1, %k0, %k0 -; CHECK-NEXT: kshiftrq $1, %k0, %k0 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlq $63, %k1, %k1 -; CHECK-NEXT: korq %k1, %k0, %k1 +; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vmovd %esi, %xmm1 +; CHECK-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $3, %r8d, %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 +; CHECK-NEXT: vpbroadcastb {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k1 +; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k2 +; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; CHECK-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k3 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovdqu8 %zmm0, 64(%rdi) {%k1} -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload -; CHECK-NEXT: vmovdqu8 %zmm0, 128(%rdi) {%k1} -; CHECK-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload +; CHECK-NEXT: vmovdqu8 %zmm0, 64(%rdi) {%k3} +; CHECK-NEXT: vmovdqu8 %zmm0, 128(%rdi) {%k2} ; CHECK-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: addq $376, %rsp ## imm = 0x178 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq tail call void @llvm.masked.store.v192i8.p0(<192 x i8> zeroinitializer, ptr %p0, <192 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll index 1839eefcd264e..3879242a29c05 100644 --- a/llvm/test/CodeGen/X86/vector-compress.ll +++ b/llvm/test/CodeGen/X86/vector-compress.ll @@ -2349,465 +2349,104 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX512F-NEXT: pushq %rbp ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-64, %rsp -; AVX512F-NEXT: subq $256, %rsp # imm = 0x100 -; AVX512F-NEXT: movzbl 352(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: kmovw %eax, %k0 -; AVX512F-NEXT: movzbl 360(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-5, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k6 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 368(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-9, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 376(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $12, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-17, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 384(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $11, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-33, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k2 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 392(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $10, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-65, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 400(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $9, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-129, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k3 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 408(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $8, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-257, %ax # imm = 0xFEFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 416(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $7, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-513, %ax # imm = 0xFDFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k4 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 424(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $6, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-1025, %ax # imm = 0xFBFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 432(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $5, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-2049, %ax # imm = 0xF7FF -; AVX512F-NEXT: kmovw %eax, %k5 -; AVX512F-NEXT: kandw %k5, %k0, %k0 -; AVX512F-NEXT: movzbl 440(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $4, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-4097, %ax # imm = 0xEFFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 448(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $3, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-8193, %ax # imm = 0xDFFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 456(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $2, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k1 -; AVX512F-NEXT: movw $-16385, %ax # imm = 0xBFFF -; AVX512F-NEXT: kmovw %eax, %k0 -; AVX512F-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 464(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 472(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 224(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: movzbl 232(%rbp), %r10d -; AVX512F-NEXT: kmovw %r10d, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: korw %k1, %k7, %k1 -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 240(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 248(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 256(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 264(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 272(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 280(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 288(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 296(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 304(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $5, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 312(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $4, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 320(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $3, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 328(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $2, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 336(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 344(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 96(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: movzbl 104(%rbp), %r10d -; AVX512F-NEXT: kmovw %r10d, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: korw %k1, %k7, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 112(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 120(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 128(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 136(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 144(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 152(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 160(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 168(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 176(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $5, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 184(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $4, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 192(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $3, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 200(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $2, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 208(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 216(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: andl $1, %edi -; AVX512F-NEXT: kmovw %esi, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $14, %k7, %k7 -; AVX512F-NEXT: kmovw %edi, %k6 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %edx, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %ecx, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %r8d, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %r9d, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 16(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 24(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 32(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kandw %k3, %k6, %k6 -; AVX512F-NEXT: movzbl 40(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kandw %k4, %k6, %k5 -; AVX512F-NEXT: movzbl 48(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k6 -; AVX512F-NEXT: kshiftlw $15, %k6, %k6 -; AVX512F-NEXT: kshiftrw $5, %k6, %k6 -; AVX512F-NEXT: korw %k6, %k5, %k5 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k5, %k4 -; AVX512F-NEXT: movzbl 56(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k5 -; AVX512F-NEXT: kshiftlw $15, %k5, %k5 -; AVX512F-NEXT: kshiftrw $4, %k5, %k5 -; AVX512F-NEXT: korw %k5, %k4, %k4 -; AVX512F-NEXT: kandw %k0, %k4, %k3 -; AVX512F-NEXT: movzbl 64(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k4 -; AVX512F-NEXT: kshiftlw $15, %k4, %k4 -; AVX512F-NEXT: kshiftrw $3, %k4, %k4 -; AVX512F-NEXT: korw %k4, %k3, %k3 -; AVX512F-NEXT: kandw %k2, %k3, %k2 -; AVX512F-NEXT: movzbl 72(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k3 -; AVX512F-NEXT: kshiftlw $15, %k3, %k3 -; AVX512F-NEXT: kshiftrw $2, %k3, %k3 -; AVX512F-NEXT: korw %k3, %k2, %k2 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k2, %k0 -; AVX512F-NEXT: movzbl 80(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: kshiftlw $14, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k0, %k0 -; AVX512F-NEXT: kshiftlw $1, %k0, %k0 -; AVX512F-NEXT: kshiftrw $1, %k0, %k0 -; AVX512F-NEXT: movzbl 88(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: kshiftlw $15, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k0, %k2 +; AVX512F-NEXT: subq $192, %rsp +; AVX512F-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 360(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $2, 368(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $3, 376(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $4, 384(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $5, 392(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $6, 400(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $7, 408(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $8, 416(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $9, 424(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $10, 432(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $11, 440(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $12, 448(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 456(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 464(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 472(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512F-NEXT: vptestmd %zmm3, %zmm2, %k1 +; AVX512F-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 232(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $2, 240(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $3, 248(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $4, 256(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $5, 264(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $6, 272(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $7, 280(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $8, 288(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $9, 296(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $10, 304(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $11, 312(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $12, 320(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 328(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 336(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 344(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm3, %zmm2, %k3 +; AVX512F-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 104(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $2, 112(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $3, 120(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $4, 128(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $5, 136(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $6, 144(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $7, 152(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $8, 160(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $9, 168(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $10, 176(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $11, 184(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $12, 192(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 200(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 208(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 216(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm3, %zmm2, %k2 +; AVX512F-NEXT: vmovd %edi, %xmm2 +; AVX512F-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm3, %zmm2, %k4 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpcompressd %zmm2, %zmm4 {%k2} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero,xmm6[8],zero,zero,zero,xmm6[9],zero,zero,zero,xmm6[10],zero,zero,zero,xmm6[11],zero,zero,zero,xmm6[12],zero,zero,zero,xmm6[13],zero,zero,zero,xmm6[14],zero,zero,zero,xmm6[15],zero,zero,zero -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: vpcompressd %zmm3, %zmm5 {%k2} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k2} {z} = -1 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpcompressd %zmm0, %zmm7 {%k1} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 -; AVX512F-NEXT: vextracti128 $1, %ymm6, %xmm6 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero,xmm6[8],zero,zero,zero,xmm6[9],zero,zero,zero,xmm6[10],zero,zero,zero,xmm6[11],zero,zero,zero,xmm6[12],zero,zero,zero,xmm6[13],zero,zero,zero,xmm6[14],zero,zero,zero,xmm6[15],zero,zero,zero -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512F-NEXT: vpcompressd %zmm6, %zmm6 {%k1} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm8 {%k1} {z} = -1 -; AVX512F-NEXT: vpmovdb %zmm4, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpsrld $31, %zmm2, %zmm4 -; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm9 -; AVX512F-NEXT: vpaddd %ymm4, %ymm9, %ymm4 -; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm9 -; AVX512F-NEXT: vpaddd %xmm4, %xmm9, %xmm4 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm9 = xmm4[2,3,2,3] -; AVX512F-NEXT: vpaddd %xmm4, %xmm9, %xmm4 -; AVX512F-NEXT: vpextrd $1, %xmm4, %eax -; AVX512F-NEXT: vmovd %xmm4, %ecx +; AVX512F-NEXT: vpcompressd %zmm2, %zmm2 {%k4} {z} +; AVX512F-NEXT: vpmovdb %zmm2, (%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k4} {z} = -1 +; AVX512F-NEXT: vpsrld $31, %zmm2, %zmm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-NEXT: vpaddd %ymm4, %ymm3, %ymm3 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpaddd %xmm4, %xmm3, %xmm3 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] +; AVX512F-NEXT: vpaddd %xmm4, %xmm3, %xmm3 +; AVX512F-NEXT: vpextrd $1, %xmm3, %eax +; AVX512F-NEXT: vmovd %xmm3, %ecx ; AVX512F-NEXT: addl %eax, %ecx ; AVX512F-NEXT: andl $31, %ecx -; AVX512F-NEXT: vpmovdb %zmm7, 64(%rsp,%rcx) -; AVX512F-NEXT: vpmovdb %zmm5, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512F-NEXT: vpcompressd %zmm3, %zmm3 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm3, (%rsp,%rcx) +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpcompressd %zmm3, %zmm3 {%k3} {z} +; AVX512F-NEXT: vpmovdb %zmm3, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k3} {z} = -1 ; AVX512F-NEXT: vpsrld $31, %zmm3, %zmm4 ; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm5 ; AVX512F-NEXT: vpaddd %ymm5, %ymm4, %ymm4 @@ -2819,9 +2458,13 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX512F-NEXT: vmovd %xmm4, %ecx ; AVX512F-NEXT: addl %eax, %ecx ; AVX512F-NEXT: andl $31, %ecx -; AVX512F-NEXT: vpmovdb %zmm6, 96(%rsp,%rcx) -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm4 -; AVX512F-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, 32(%rsp,%rcx) +; AVX512F-NEXT: vmovdqa (%rsp), %ymm0 +; AVX512F-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1 ; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm4 ; AVX512F-NEXT: vpsubd %zmm2, %zmm4, %zmm4 ; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm5 @@ -2834,10 +2477,11 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8> ; AVX512F-NEXT: vmovd %xmm4, %ecx ; AVX512F-NEXT: addl %eax, %ecx ; AVX512F-NEXT: andl $63, %ecx -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm4 -; AVX512F-NEXT: vmovaps %ymm4, 128(%rsp,%rcx) +; AVX512F-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm4 +; AVX512F-NEXT: vmovdqa %ymm4, 64(%rsp,%rcx) ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vpmovdb %zmm8, %xmm4 +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm4 {%k1} {z} = -1 +; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 ; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; AVX512F-NEXT: vpblendvb %ymm3, {{[0-9]+}}(%rsp), %ymm4, %ymm3 @@ -3632,458 +3276,100 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i ; AVX512F-NEXT: pushq %rbp ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-64, %rsp -; AVX512F-NEXT: subq $640, %rsp # imm = 0x280 -; AVX512F-NEXT: movzbl 352(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: kmovw %eax, %k0 -; AVX512F-NEXT: movzbl 360(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-5, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k6 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 368(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-9, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 376(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $12, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-17, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 384(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $11, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-33, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k2 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 392(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $10, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-65, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 400(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $9, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-129, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k3 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 408(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $8, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-257, %ax # imm = 0xFEFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 416(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $7, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-513, %ax # imm = 0xFDFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %k1, %k4 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 424(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $6, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-1025, %ax # imm = 0xFBFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 432(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $5, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-2049, %ax # imm = 0xF7FF -; AVX512F-NEXT: kmovw %eax, %k5 -; AVX512F-NEXT: kandw %k5, %k0, %k0 -; AVX512F-NEXT: movzbl 440(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $4, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-4097, %ax # imm = 0xEFFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 448(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $3, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-8193, %ax # imm = 0xDFFF -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: movzbl 456(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $2, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k1 -; AVX512F-NEXT: movw $-16385, %ax # imm = 0xBFFF -; AVX512F-NEXT: kmovw %eax, %k0 -; AVX512F-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 464(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 472(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 224(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: movzbl 232(%rbp), %r10d -; AVX512F-NEXT: kmovw %r10d, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: korw %k1, %k7, %k1 -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 240(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 248(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 256(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 264(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 272(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 280(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 288(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 296(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 304(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $5, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 312(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $4, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 320(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $3, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 328(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $2, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 336(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 344(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512F-NEXT: movzbl 96(%rbp), %eax -; AVX512F-NEXT: andl $1, %eax -; AVX512F-NEXT: movzbl 104(%rbp), %r10d -; AVX512F-NEXT: kmovw %r10d, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $14, %k1, %k1 -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: korw %k1, %k7, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512F-NEXT: kandw %k7, %k1, %k1 -; AVX512F-NEXT: movzbl 112(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k6, %k1, %k1 -; AVX512F-NEXT: movzbl 120(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 128(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 136(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 144(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 152(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 160(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k1, %k1 -; AVX512F-NEXT: movzbl 168(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k4, %k1, %k1 -; AVX512F-NEXT: movzbl 176(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $5, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 184(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $4, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movzbl 192(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $3, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: kandw %k2, %k1, %k1 -; AVX512F-NEXT: movzbl 200(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $2, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k1, %k1 -; AVX512F-NEXT: movzbl 208(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $14, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: kshiftlw $1, %k1, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k1 -; AVX512F-NEXT: movzbl 216(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k1, %k1 -; AVX512F-NEXT: andl $1, %edi -; AVX512F-NEXT: kmovw %esi, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $14, %k7, %k7 -; AVX512F-NEXT: kmovw %edi, %k6 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %edx, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $13, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %ecx, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $12, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %r8d, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $11, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: kmovw %r9d, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $10, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 16(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $9, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 24(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $8, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512F-NEXT: kandw %k5, %k6, %k6 -; AVX512F-NEXT: movzbl 32(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $7, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kandw %k3, %k6, %k6 -; AVX512F-NEXT: movzbl 40(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k7 -; AVX512F-NEXT: kshiftlw $15, %k7, %k7 -; AVX512F-NEXT: kshiftrw $6, %k7, %k7 -; AVX512F-NEXT: korw %k7, %k6, %k6 -; AVX512F-NEXT: kandw %k4, %k6, %k5 -; AVX512F-NEXT: movzbl 48(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k6 -; AVX512F-NEXT: kshiftlw $15, %k6, %k6 -; AVX512F-NEXT: kshiftrw $5, %k6, %k6 -; AVX512F-NEXT: korw %k6, %k5, %k5 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512F-NEXT: kandw %k3, %k5, %k4 -; AVX512F-NEXT: movzbl 56(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k5 -; AVX512F-NEXT: kshiftlw $15, %k5, %k5 -; AVX512F-NEXT: kshiftrw $4, %k5, %k5 -; AVX512F-NEXT: korw %k5, %k4, %k4 -; AVX512F-NEXT: kandw %k0, %k4, %k3 -; AVX512F-NEXT: movzbl 64(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k4 -; AVX512F-NEXT: kshiftlw $15, %k4, %k4 -; AVX512F-NEXT: kshiftrw $3, %k4, %k4 -; AVX512F-NEXT: korw %k4, %k3, %k3 -; AVX512F-NEXT: kandw %k2, %k3, %k2 -; AVX512F-NEXT: movzbl 72(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k3 -; AVX512F-NEXT: kshiftlw $15, %k3, %k3 -; AVX512F-NEXT: kshiftrw $2, %k3, %k3 -; AVX512F-NEXT: korw %k3, %k2, %k2 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512F-NEXT: kandw %k0, %k2, %k0 -; AVX512F-NEXT: movzbl 80(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: kshiftlw $14, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k0, %k0 -; AVX512F-NEXT: kshiftlw $1, %k0, %k0 -; AVX512F-NEXT: kshiftrw $1, %k0, %k0 -; AVX512F-NEXT: movzbl 88(%rbp), %eax -; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: kshiftlw $15, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k0, %k2 -; AVX512F-NEXT: vpcompressd %zmm0, %zmm4 {%k2} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: vpcompressd %zmm2, %zmm2 {%k2} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm5 {%k2} {z} = -1 -; AVX512F-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512F-NEXT: vpcompressd %zmm3, %zmm3 {%k2} {z} -; AVX512F-NEXT: vpcompressd %zmm1, %zmm1 {%k1} {z} -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm6 {%k1} {z} = -1 -; AVX512F-NEXT: vmovdqa64 %zmm4, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: subq $576, %rsp # imm = 0x240 +; AVX512F-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 360(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $2, 368(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $3, 376(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $4, 384(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $5, 392(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $6, 400(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $7, 408(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $8, 416(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $9, 424(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $10, 432(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $11, 440(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $12, 448(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $13, 456(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $14, 464(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $15, 472(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512F-NEXT: vptestmd %zmm5, %zmm4, %k1 +; AVX512F-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 232(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $2, 240(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $3, 248(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $4, 256(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $5, 264(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $6, 272(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $7, 280(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $8, 288(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $9, 296(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $10, 304(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $11, 312(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $12, 320(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $13, 328(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $14, 336(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $15, 344(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm5, %zmm4, %k3 +; AVX512F-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrb $1, 104(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $2, 112(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $3, 120(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $4, 128(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $5, 136(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $6, 144(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $7, 152(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $8, 160(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $9, 168(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $10, 176(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $11, 184(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $12, 192(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $13, 200(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $14, 208(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $15, 216(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm5, %zmm4, %k2 +; AVX512F-NEXT: vmovd %edi, %xmm4 +; AVX512F-NEXT: vpinsrb $1, %esi, %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $2, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $3, %ecx, %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $4, %r8d, %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $5, %r9d, %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $6, 16(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $7, 24(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $8, 32(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $9, 40(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $10, 48(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $11, 56(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $12, 64(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $13, 72(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $14, 80(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpinsrb $15, 88(%rbp), %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vptestmd %zmm5, %zmm4, %k4 +; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k4} {z} +; AVX512F-NEXT: vmovdqa64 %zmm0, (%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k4} {z} = -1 ; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm4 -; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm7 -; AVX512F-NEXT: vpaddd %ymm7, %ymm4, %ymm4 -; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm7 -; AVX512F-NEXT: vpaddd %xmm7, %xmm4, %xmm4 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm7 = xmm4[2,3,2,3] -; AVX512F-NEXT: vpaddd %xmm7, %xmm4, %xmm4 +; AVX512F-NEXT: vextracti64x4 $1, %zmm4, %ymm5 +; AVX512F-NEXT: vpaddd %ymm5, %ymm4, %ymm4 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm5 +; AVX512F-NEXT: vpaddd %xmm5, %xmm4, %xmm4 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,2,3] +; AVX512F-NEXT: vpaddd %xmm5, %xmm4, %xmm4 ; AVX512F-NEXT: vpextrd $1, %xmm4, %eax ; AVX512F-NEXT: vmovd %xmm4, %ecx ; AVX512F-NEXT: addl %eax, %ecx ; AVX512F-NEXT: andl $31, %ecx -; AVX512F-NEXT: vmovdqa64 %zmm1, 64(%rsp,%rcx,4) -; AVX512F-NEXT: vmovdqa64 %zmm2, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpsrld $31, %zmm5, %zmm1 +; AVX512F-NEXT: vpcompressd %zmm1, %zmm1 {%k2} {z} +; AVX512F-NEXT: vmovdqa64 %zmm1, (%rsp,%rcx,4) +; AVX512F-NEXT: vpcompressd %zmm2, %zmm1 {%k3} {z} +; AVX512F-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k3} {z} = -1 +; AVX512F-NEXT: vpsrld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 ; AVX512F-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 @@ -4094,11 +3380,13 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i ; AVX512F-NEXT: vmovd %xmm1, %ecx ; AVX512F-NEXT: addl %eax, %ecx ; AVX512F-NEXT: andl $31, %ecx -; AVX512F-NEXT: vmovdqa64 %zmm3, 192(%rsp,%rcx,4) -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm1 +; AVX512F-NEXT: vpcompressd %zmm3, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 %zmm1, 128(%rsp,%rcx,4) +; AVX512F-NEXT: vmovdqa64 (%rsp), %zmm1 ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm2 -; AVX512F-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpsrld $31, %zmm6, %zmm1 +; AVX512F-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k2} {z} = -1 +; AVX512F-NEXT: vpsrld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vpsubd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0 @@ -4112,9 +3400,9 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i ; AVX512F-NEXT: andl $63, %ecx ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm1 -; AVX512F-NEXT: vmovaps %zmm0, 320(%rsp,%rcx,4) +; AVX512F-NEXT: vmovaps %zmm0, 256(%rsp,%rcx,4) ; AVX512F-NEXT: vmovaps %zmm2, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vmovaps %zmm1, 384(%rsp,%rcx,4) +; AVX512F-NEXT: vmovaps %zmm1, 320(%rsp,%rcx,4) ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm1 ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm2