diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9fb97918cb71a..ced4e96721268 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7198,6 +7198,28 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const { using namespace llvm::PatternMatch; + if (I->getOpcode() == Instruction::And && + (ST->hasBMI() || (I->getType()->isVectorTy() && ST->hasSSE2()))) { + for (auto &Op : I->operands()) { + // (and X, (not Y)) -> (andn X, Y) + if (match(Op.get(), m_Not(m_Value())) && !I->getType()->isIntegerTy(8)) { + Ops.push_back(&Op); + return true; + } + // (and X, (splat (not Y))) -> (andn X, (splat Y)) + if (match(Op.get(), + m_Shuffle(m_InsertElt(m_Value(), m_Not(m_Value()), m_ZeroInt()), + m_Value(), m_ZeroMask()))) { + Use &InsertElt = cast(Op)->getOperandUse(0); + Use &Not = cast(InsertElt)->getOperandUse(1); + Ops.push_back(&Not); + Ops.push_back(&InsertElt); + Ops.push_back(&Op); + return true; + } + } + } + FixedVectorType *VTy = dyn_cast(I->getType()); if (!VTy) return false; diff --git a/llvm/test/CodeGen/X86/andnot-sink-not.ll b/llvm/test/CodeGen/X86/andnot-sink-not.ll new file mode 100644 index 0000000000000..fefbdc84699f4 --- /dev/null +++ b/llvm/test/CodeGen/X86/andnot-sink-not.ll @@ -0,0 +1,3114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=-bmi | FileCheck %s --check-prefixes=X86-NOBMI,X86 +; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE +; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE2 +; RUN: llc < %s -mtriple=i686-- -mattr=+bmi | FileCheck %s --check-prefixes=X86-BMI +; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOAVX2,X64-NOBMI +; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI,X64-NOAVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi,+avx2 | FileCheck %s --check-prefixes=X64,X64-BMI,X64-AVX2 + +define i8 @and_sink_not_i8(i8 %x, i8 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i8: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB0_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notb %cl +; X86-NOBMI-NEXT: andb %al, %cl +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: .LBB0_2: # %identity +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i8: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB0_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: notb %cl +; X86-BMI-NEXT: andb %al, %cl +; X86-BMI-NEXT: movl %ecx, %eax +; X86-BMI-NEXT: .LBB0_2: # %identity +; X86-BMI-NEXT: retl +; +; X64-LABEL: and_sink_not_i8: +; X64: # %bb.0: +; X64-NEXT: testl %edx, %edx +; X64-NEXT: je .LBB0_2 +; X64-NEXT: # %bb.1: # %mask +; X64-NEXT: notb %sil +; X64-NEXT: andb %dil, %sil +; X64-NEXT: movl %esi, %eax +; X64-NEXT: retq +; X64-NEXT: .LBB0_2: # %identity +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq + %a = xor i8 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i8 %a, %x + ret i8 %masked + +identity: + ret i8 %x +} + +define i8 @and_sink_not_i8_swapped(i8 %x, i8 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i8_swapped: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB1_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notb %cl +; X86-NOBMI-NEXT: andb %cl, %al +; X86-NOBMI-NEXT: .LBB1_2: # %identity +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i8_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB1_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: notb %cl +; X86-BMI-NEXT: andb %cl, %al +; X86-BMI-NEXT: .LBB1_2: # %identity +; X86-BMI-NEXT: retl +; +; X64-LABEL: and_sink_not_i8_swapped: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: testl %edx, %edx +; X64-NEXT: je .LBB1_2 +; X64-NEXT: # %bb.1: # %mask +; X64-NEXT: notb %sil +; X64-NEXT: andb %sil, %al +; X64-NEXT: .LBB1_2: # %identity +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %a = xor i8 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i8 %x, %a + ret i8 %masked + +identity: + ret i8 %x +} + +define i16 @and_sink_not_i16(i16 %x, i16 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i16: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB2_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %eax, %ecx +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: retl +; X86-NOBMI-NEXT: .LBB2_2: # %identity +; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i16: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB2_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl %eax, %ecx, %eax +; X86-BMI-NEXT: .LBB2_2: # %identity +; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB2_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: andl %edi, %esi +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: retq +; X64-NOBMI-NEXT: .LBB2_2: # %identity +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB2_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB2_2: # %identity +; X64-BMI-NEXT: movl %edi, %eax +; X64-BMI-NEXT: retq + %a = xor i16 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i16 %a, %x + ret i16 %masked + +identity: + ret i16 %x +} + +define i16 @and_sink_not_i16_swapped(i16 %x, i16 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i16_swapped: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB3_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %ecx, %eax +; X86-NOBMI-NEXT: .LBB3_2: # %identity +; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i16_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB3_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl %eax, %ecx, %eax +; X86-BMI-NEXT: .LBB3_2: # %identity +; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i16_swapped: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB3_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: andl %esi, %eax +; X64-NOBMI-NEXT: .LBB3_2: # %identity +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i16_swapped: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB3_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB3_2: # %identity +; X64-BMI-NEXT: movl %edi, %eax +; X64-BMI-NEXT: retq + %a = xor i16 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i16 %x, %a + ret i16 %masked + +identity: + ret i16 %x +} + +define i32 @and_sink_not_i32(i32 %x, i32 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB4_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %eax, %ecx +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: .LBB4_2: # %identity +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB4_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl %eax, %ecx, %eax +; X86-BMI-NEXT: .LBB4_2: # %identity +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB4_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: andl %edi, %esi +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: retq +; X64-NOBMI-NEXT: .LBB4_2: # %identity +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB4_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB4_2: # %identity +; X64-BMI-NEXT: movl %edi, %eax +; X64-BMI-NEXT: retq + %a = xor i32 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i32 %a, %x + ret i32 %masked + +identity: + ret i32 %x +} + +define i32 @and_sink_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) { +; X86-NOBMI-LABEL: and_sink_not_i32_swapped: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB5_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %ecx, %eax +; X86-NOBMI-NEXT: .LBB5_2: # %identity +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i32_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB5_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl %eax, %ecx, %eax +; X86-BMI-NEXT: .LBB5_2: # %identity +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i32_swapped: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB5_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: andl %esi, %eax +; X64-NOBMI-NEXT: .LBB5_2: # %identity +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i32_swapped: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB5_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB5_2: # %identity +; X64-BMI-NEXT: movl %edi, %eax +; X64-BMI-NEXT: retq + %a = xor i32 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i32 %x, %a + ret i32 %masked + +identity: + ret i32 %x +} + +define i64 @and_sink_not_i64(i64 %x, i64 %m, i1 zeroext %cond) nounwind { +; X86-NOBMI-LABEL: and_sink_not_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB6_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: notl %esi +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %eax, %ecx +; X86-NOBMI-NEXT: andl %edx, %esi +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB6_2: # %identity +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB6_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %eax, %esi, %eax +; X86-BMI-NEXT: andnl %edx, %ecx, %edx +; X86-BMI-NEXT: .LBB6_2: # %identity +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB6_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notq %rsi +; X64-NOBMI-NEXT: andq %rdi, %rsi +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: retq +; X64-NOBMI-NEXT: .LBB6_2: # %identity +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB6_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB6_2: # %identity +; X64-BMI-NEXT: movq %rdi, %rax +; X64-BMI-NEXT: retq + %a = xor i64 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i64 %a, %x + ret i64 %masked + +identity: + ret i64 %x +} + +define i64 @and_sink_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) nounwind { +; X86-NOBMI-LABEL: and_sink_not_i64_swapped: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: je .LBB7_2 +; X86-NOBMI-NEXT: # %bb.1: # %mask +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: notl %esi +; X86-NOBMI-NEXT: notl %ecx +; X86-NOBMI-NEXT: andl %ecx, %eax +; X86-NOBMI-NEXT: andl %esi, %edx +; X86-NOBMI-NEXT: .LBB7_2: # %identity +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_i64_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB7_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %eax, %esi, %eax +; X86-BMI-NEXT: andnl %edx, %ecx, %edx +; X86-BMI-NEXT: .LBB7_2: # %identity +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: and_sink_not_i64_swapped: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: testl %edx, %edx +; X64-NOBMI-NEXT: je .LBB7_2 +; X64-NOBMI-NEXT: # %bb.1: # %mask +; X64-NOBMI-NEXT: notq %rsi +; X64-NOBMI-NEXT: andq %rsi, %rax +; X64-NOBMI-NEXT: .LBB7_2: # %identity +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: and_sink_not_i64_swapped: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: testl %edx, %edx +; X64-BMI-NEXT: je .LBB7_2 +; X64-BMI-NEXT: # %bb.1: # %mask +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq +; X64-BMI-NEXT: .LBB7_2: # %identity +; X64-BMI-NEXT: movq %rdi, %rax +; X64-BMI-NEXT: retq + %a = xor i64 %m, -1 + br i1 %cond, label %mask, label %identity + +mask: + %masked = and i64 %x, %a + ret i64 %masked + +identity: + ret i64 %x +} + +define <8 x i8> @and_sink_not_v8i8(<8 x i8> %x, <8 x i8> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v8i8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB8_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: notb %dh +; X86-NEXT: andb %ch, %dh +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %dl, %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: notb %dl +; X86-NEXT: andb %cl, %dl +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: notb %cl +; X86-NEXT: andb %bh, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-NEXT: notb %bh +; X86-NEXT: andb %bl, %bh +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: notb %bl +; X86-NEXT: andb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: notb %al +; X86-NEXT: andb %ah, %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: notb %ah +; X86-NEXT: andb {{[0-9]+}}(%esp), %ah +; X86-NEXT: movb %ah, (%esi) +; X86-NEXT: movb %al, 1(%esi) +; X86-NEXT: movb %bl, 2(%esi) +; X86-NEXT: movb %bh, 3(%esi) +; X86-NEXT: movb %cl, 4(%esi) +; X86-NEXT: movb %dl, 5(%esi) +; X86-NEXT: movb %ch, 6(%esi) +; X86-NEXT: movb %dh, 7(%esi) +; X86-NEXT: jmp .LBB8_3 +; X86-NEXT: .LBB8_2: # %identity +; X86-NEXT: movb %al, (%esi) +; X86-NEXT: movb %ah, 1(%esi) +; X86-NEXT: movb %dh, 2(%esi) +; X86-NEXT: movb %bl, 3(%esi) +; X86-NEXT: movb %bh, 4(%esi) +; X86-NEXT: movb %cl, 5(%esi) +; X86-NEXT: movb %dl, 6(%esi) +; X86-NEXT: movb %ch, 7(%esi) +; X86-NEXT: .LBB8_3: # %identity +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v8i8: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB8_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: notb %dh +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %dl, %ch +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-SSE-NEXT: notb %dl +; X86-SSE-NEXT: andb %cl, %dl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE-NEXT: notb %cl +; X86-SSE-NEXT: andb %bh, %cl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-SSE-NEXT: notb %bh +; X86-SSE-NEXT: andb %bl, %bh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-SSE-NEXT: notb %bl +; X86-SSE-NEXT: andb {{[0-9]+}}(%esp), %bl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-SSE-NEXT: notb %al +; X86-SSE-NEXT: andb %ah, %al +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-SSE-NEXT: notb %ah +; X86-SSE-NEXT: andb {{[0-9]+}}(%esp), %ah +; X86-SSE-NEXT: movb %ah, (%esi) +; X86-SSE-NEXT: movb %al, 1(%esi) +; X86-SSE-NEXT: movb %bl, 2(%esi) +; X86-SSE-NEXT: movb %bh, 3(%esi) +; X86-SSE-NEXT: movb %cl, 4(%esi) +; X86-SSE-NEXT: movb %dl, 5(%esi) +; X86-SSE-NEXT: movb %ch, 6(%esi) +; X86-SSE-NEXT: movb %dh, 7(%esi) +; X86-SSE-NEXT: jmp .LBB8_3 +; X86-SSE-NEXT: .LBB8_2: # %identity +; X86-SSE-NEXT: movb %al, (%esi) +; X86-SSE-NEXT: movb %ah, 1(%esi) +; X86-SSE-NEXT: movb %dh, 2(%esi) +; X86-SSE-NEXT: movb %bl, 3(%esi) +; X86-SSE-NEXT: movb %bh, 4(%esi) +; X86-SSE-NEXT: movb %cl, 5(%esi) +; X86-SSE-NEXT: movb %dl, 6(%esi) +; X86-SSE-NEXT: movb %ch, 7(%esi) +; X86-SSE-NEXT: .LBB8_3: # %identity +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v8i8: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB8_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: movaps %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB8_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v8i8: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB8_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: notb %cl +; X86-BMI-NEXT: andb %dh, %cl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: notb %dh +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %dl, %ch +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI-NEXT: notb %dl +; X86-BMI-NEXT: andb %bh, %dl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-BMI-NEXT: notb %bh +; X86-BMI-NEXT: andb %bl, %bh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI-NEXT: notb %bl +; X86-BMI-NEXT: andb {{[0-9]+}}(%esp), %bl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI-NEXT: notb %al +; X86-BMI-NEXT: andb %ah, %al +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-BMI-NEXT: notb %ah +; X86-BMI-NEXT: andb {{[0-9]+}}(%esp), %ah +; X86-BMI-NEXT: movb %ah, (%esi) +; X86-BMI-NEXT: movb %al, 1(%esi) +; X86-BMI-NEXT: movb %bl, 2(%esi) +; X86-BMI-NEXT: movb %bh, 3(%esi) +; X86-BMI-NEXT: movb %dl, 4(%esi) +; X86-BMI-NEXT: movb %ch, 5(%esi) +; X86-BMI-NEXT: movb %dh, 6(%esi) +; X86-BMI-NEXT: movb %cl, 7(%esi) +; X86-BMI-NEXT: jmp .LBB8_3 +; X86-BMI-NEXT: .LBB8_2: # %identity +; X86-BMI-NEXT: movb %al, (%esi) +; X86-BMI-NEXT: movb %ah, 1(%esi) +; X86-BMI-NEXT: movb %cl, 2(%esi) +; X86-BMI-NEXT: movb %bl, 3(%esi) +; X86-BMI-NEXT: movb %bh, 4(%esi) +; X86-BMI-NEXT: movb %dl, 5(%esi) +; X86-BMI-NEXT: movb %ch, 6(%esi) +; X86-BMI-NEXT: movb %dh, 7(%esi) +; X86-BMI-NEXT: .LBB8_3: # %identity +; X86-BMI-NEXT: movl %esi, %eax +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v8i8: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB8_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movaps %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB8_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v8i8: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB8_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB8_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <8 x i8> %m, splat (i8 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <8 x i8> %a, %x + ret <8 x i8> %masked + +identity: + ret <8 x i8> %x +} + +define <8 x i8> @and_sink_not_v8i8_swapped(<8 x i8> %x, <8 x i8> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v8i8_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB9_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movb %ch, %dh +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %dh +; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: andb %ch, %ah +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %bl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %bh +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %dl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: andb %ch, %dh +; X86-NEXT: movb %dh, (%esi) +; X86-NEXT: movb %dl, 1(%esi) +; X86-NEXT: movb %cl, 2(%esi) +; X86-NEXT: movb %bh, 3(%esi) +; X86-NEXT: movb %bl, 4(%esi) +; X86-NEXT: movb %al, 5(%esi) +; X86-NEXT: movb %ah, 6(%esi) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 7(%esi) +; X86-NEXT: jmp .LBB9_3 +; X86-NEXT: .LBB9_2: # %identity +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movb %dh, (%esi) +; X86-NEXT: movb %dl, 1(%esi) +; X86-NEXT: movb %cl, 2(%esi) +; X86-NEXT: movb %bh, 3(%esi) +; X86-NEXT: movb %bl, 4(%esi) +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb %cl, 5(%esi) +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb %cl, 6(%esi) +; X86-NEXT: movb %ch, 7(%esi) +; X86-NEXT: .LBB9_3: # %identity +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v8i8_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB9_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movb %ch, %dh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-SSE-NEXT: andb %ch, %ah +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %al +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %bl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %bh +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %cl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %dl +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: movb %dh, (%esi) +; X86-SSE-NEXT: movb %dl, 1(%esi) +; X86-SSE-NEXT: movb %cl, 2(%esi) +; X86-SSE-NEXT: movb %bh, 3(%esi) +; X86-SSE-NEXT: movb %bl, 4(%esi) +; X86-SSE-NEXT: movb %al, 5(%esi) +; X86-SSE-NEXT: movb %ah, 6(%esi) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 7(%esi) +; X86-SSE-NEXT: jmp .LBB9_3 +; X86-SSE-NEXT: .LBB9_2: # %identity +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: movb %dh, (%esi) +; X86-SSE-NEXT: movb %dl, 1(%esi) +; X86-SSE-NEXT: movb %cl, 2(%esi) +; X86-SSE-NEXT: movb %bh, 3(%esi) +; X86-SSE-NEXT: movb %bl, 4(%esi) +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE-NEXT: movb %cl, 5(%esi) +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE-NEXT: movb %cl, 6(%esi) +; X86-SSE-NEXT: movb %ch, 7(%esi) +; X86-SSE-NEXT: .LBB9_3: # %identity +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: addl $4, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v8i8_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB9_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: movaps %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB9_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v8i8_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: pushl %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB9_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movb %ch, %dh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-BMI-NEXT: andb %ch, %ah +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %al +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %bl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %bh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %cl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %dl +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: movb %dh, (%esi) +; X86-BMI-NEXT: movb %dl, 1(%esi) +; X86-BMI-NEXT: movb %cl, 2(%esi) +; X86-BMI-NEXT: movb %bh, 3(%esi) +; X86-BMI-NEXT: movb %bl, 4(%esi) +; X86-BMI-NEXT: movb %al, 5(%esi) +; X86-BMI-NEXT: movb %ah, 6(%esi) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 7(%esi) +; X86-BMI-NEXT: jmp .LBB9_3 +; X86-BMI-NEXT: .LBB9_2: # %identity +; X86-BMI-NEXT: movb %dh, (%esi) +; X86-BMI-NEXT: movb %dl, 1(%esi) +; X86-BMI-NEXT: movb %cl, 2(%esi) +; X86-BMI-NEXT: movb %bh, 3(%esi) +; X86-BMI-NEXT: movb %bl, 4(%esi) +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: movb %cl, 5(%esi) +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: movb %cl, 6(%esi) +; X86-BMI-NEXT: movb %ch, 7(%esi) +; X86-BMI-NEXT: .LBB9_3: # %identity +; X86-BMI-NEXT: movl %esi, %eax +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v8i8_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB9_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movaps %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB9_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v8i8_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB9_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB9_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <8 x i8> %m, splat (i8 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <8 x i8> %x, %a + ret <8 x i8> %masked + +identity: + ret <8 x i8> %x +} + +define <4 x i32> @and_sink_not_v4i32(<4 x i32> %x, <4 x i32> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v4i32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB10_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: notl %edi +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: jmp .LBB10_3 +; X86-NEXT: .LBB10_2: # %identity +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: .LBB10_3: # %identity +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v4i32: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: andl $-16, %esp +; X86-SSE-NEXT: subl $64, %esp +; X86-SSE-NEXT: movl 8(%ebp), %eax +; X86-SSE-NEXT: movl 24(%ebp), %ecx +; X86-SSE-NEXT: movl 20(%ebp), %edx +; X86-SSE-NEXT: movl 16(%ebp), %esi +; X86-SSE-NEXT: movzbl 44(%ebp), %ebx +; X86-SSE-NEXT: testb %bl, %bl +; X86-SSE-NEXT: movl 12(%ebp), %edi +; X86-SSE-NEXT: movups 28(%ebp), %xmm0 +; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB10_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl 16(%ebp), %edi +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl 20(%ebp), %edi +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl 24(%ebp), %edi +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edx, (%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE-NEXT: andps %xmm0, %xmm1 +; X86-SSE-NEXT: movaps %xmm1, (%eax) +; X86-SSE-NEXT: jmp .LBB10_3 +; X86-SSE-NEXT: .LBB10_2: # %identity +; X86-SSE-NEXT: movl %edi, (%eax) +; X86-SSE-NEXT: movl %esi, 4(%eax) +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl %ecx, 12(%eax) +; X86-SSE-NEXT: .LBB10_3: # %identity +; X86-SSE-NEXT: leal -12(%ebp), %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v4i32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB10_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: movaps %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB10_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v4i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB10_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %edi, %ebx, %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %esi, %ebx, %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %edx, %ebx, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %ecx, %ebx, %ecx +; X86-BMI-NEXT: .LBB10_2: # %identity +; X86-BMI-NEXT: movl %ecx, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %esi, 8(%eax) +; X86-BMI-NEXT: movl %edi, 12(%eax) +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v4i32: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB10_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movaps %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB10_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v4i32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB10_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB10_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <4 x i32> %m, splat (i32 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i32> %a, %x + ret <4 x i32> %masked + +identity: + ret <4 x i32> %x +} + +define <4 x i32> @and_sink_not_v4i32_swapped(<4 x i32> %x, <4 x i32> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v4i32_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB11_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: .LBB11_2: # %identity +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v4i32_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: movl %esp, %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: andl $-16, %esp +; X86-SSE-NEXT: subl $64, %esp +; X86-SSE-NEXT: movl 8(%ebp), %eax +; X86-SSE-NEXT: movl 24(%ebp), %ecx +; X86-SSE-NEXT: movl 20(%ebp), %edx +; X86-SSE-NEXT: movl 16(%ebp), %esi +; X86-SSE-NEXT: movzbl 44(%ebp), %ebx +; X86-SSE-NEXT: testb %bl, %bl +; X86-SSE-NEXT: movl 12(%ebp), %edi +; X86-SSE-NEXT: movups 28(%ebp), %xmm0 +; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB11_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, (%esp) +; X86-SSE-NEXT: movl 16(%ebp), %ecx +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl 20(%ebp), %ecx +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl 24(%ebp), %ecx +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE-NEXT: andps %xmm0, %xmm1 +; X86-SSE-NEXT: movaps %xmm1, (%eax) +; X86-SSE-NEXT: jmp .LBB11_3 +; X86-SSE-NEXT: .LBB11_2: # %identity +; X86-SSE-NEXT: movl %edi, (%eax) +; X86-SSE-NEXT: movl %esi, 4(%eax) +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl %ecx, 12(%eax) +; X86-SSE-NEXT: .LBB11_3: # %identity +; X86-SSE-NEXT: leal -12(%ebp), %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v4i32_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB11_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: movaps %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB11_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v4i32_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB11_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %edi, %ebx, %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %esi, %ebx, %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %edx, %ebx, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %ecx, %ebx, %ecx +; X86-BMI-NEXT: .LBB11_2: # %identity +; X86-BMI-NEXT: movl %ecx, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %esi, 8(%eax) +; X86-BMI-NEXT: movl %edi, 12(%eax) +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v4i32_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB11_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movaps %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB11_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v4i32_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB11_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB11_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <4 x i32> %m, splat (i32 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i32> %x, %a + ret <4 x i32> %masked + +identity: + ret <4 x i32> %x +} + +define <4 x i64> @and_sink_not_v4i64(<4 x i64> %x, <4 x i64> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v4i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: je .LBB12_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: notl %edi +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %edx, %esi +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %ecx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: andl %ebp, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: notl %ebp +; X86-NEXT: andl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: notl %edi +; X86-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, (%edx) +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl %edi, 8(%edx) +; X86-NEXT: movl %ebp, 12(%edx) +; X86-NEXT: movl %ecx, 16(%edx) +; X86-NEXT: movl %esi, 20(%edx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: jmp .LBB12_3 +; X86-NEXT: .LBB12_2: # %identity +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ebp, 16(%eax) +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl %edx, 24(%eax) +; X86-NEXT: movl %esi, 28(%eax) +; X86-NEXT: .LBB12_3: # %identity +; X86-NEXT: addl $8, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v4i64: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $8, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: je .LBB12_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: notl %edi +; X86-SSE-NEXT: andl %esi, %edi +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %edx, %esi +; X86-SSE-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %ecx, %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: notl %ecx +; X86-SSE-NEXT: andl %ebp, %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: notl %ebp +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: notl %edi +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %eax, %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: notl %eax +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: notl %ebx +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl %ebx, (%edx) +; X86-SSE-NEXT: movl %eax, 4(%edx) +; X86-SSE-NEXT: movl %edi, 8(%edx) +; X86-SSE-NEXT: movl %ebp, 12(%edx) +; X86-SSE-NEXT: movl %ecx, 16(%edx) +; X86-SSE-NEXT: movl %esi, 20(%edx) +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 24(%edx) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 28(%edx) +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: jmp .LBB12_3 +; X86-SSE-NEXT: .LBB12_2: # %identity +; X86-SSE-NEXT: movl %ebx, (%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl %ebx, 4(%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl %ebx, 8(%eax) +; X86-SSE-NEXT: movl %edi, 12(%eax) +; X86-SSE-NEXT: movl %ebp, 16(%eax) +; X86-SSE-NEXT: movl %ecx, 20(%eax) +; X86-SSE-NEXT: movl %edx, 24(%eax) +; X86-SSE-NEXT: movl %esi, 28(%eax) +; X86-SSE-NEXT: .LBB12_3: # %identity +; X86-SSE-NEXT: addl $8, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v4i64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: cmpb $0, 24(%ebp) +; X86-SSE2-NEXT: je .LBB12_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movaps 8(%ebp), %xmm3 +; X86-SSE2-NEXT: andnps %xmm0, %xmm2 +; X86-SSE2-NEXT: andnps %xmm1, %xmm3 +; X86-SSE2-NEXT: movaps %xmm2, %xmm0 +; X86-SSE2-NEXT: movaps %xmm3, %xmm1 +; X86-SSE2-NEXT: .LBB12_2: # %identity +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v4i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $8, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB12_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ebp, %esi, %esi +; X86-BMI-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ebx, %esi, %esi +; X86-BMI-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %edi, %esi, %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %esi +; X86-BMI-NEXT: movl %ecx, %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: andnl %ebx, %edx, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ebp, %ebp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ebx, %ebx +; X86-BMI-NEXT: movl %ebx, (%eax) +; X86-BMI-NEXT: movl %ebp, 4(%eax) +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %edi, 20(%eax) +; X86-BMI-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 24(%eax) +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 28(%eax) +; X86-BMI-NEXT: jmp .LBB12_3 +; X86-BMI-NEXT: .LBB12_2: # %identity +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, (%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %ecx, 8(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %edi, 20(%eax) +; X86-BMI-NEXT: movl %ebx, 24(%eax) +; X86-BMI-NEXT: movl %ebp, 28(%eax) +; X86-BMI-NEXT: .LBB12_3: # %identity +; X86-BMI-NEXT: addl $8, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v4i64: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB12_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm2 +; X64-NOAVX2-NEXT: andnps %xmm1, %xmm3 +; X64-NOAVX2-NEXT: movaps %xmm2, %xmm0 +; X64-NOAVX2-NEXT: movaps %xmm3, %xmm1 +; X64-NOAVX2-NEXT: .LBB12_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v4i64: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB12_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: .LBB12_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <4 x i64> %m, splat (i64 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i64> %a, %x + ret <4 x i64> %masked + +identity: + ret <4 x i64> %x +} + +define <4 x i64> @and_sink_not_v4i64_swapped(<4 x i64> %x, <4 x i64> %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_v4i64_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB13_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl %eax, (%ebx) +; X86-NEXT: movl %edx, 4(%ebx) +; X86-NEXT: movl %ecx, 8(%ebx) +; X86-NEXT: movl %ebp, 12(%ebx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%ebx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%ebx) +; X86-NEXT: movl %edi, 24(%ebx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%ebx) +; X86-NEXT: jmp .LBB13_3 +; X86-NEXT: .LBB13_2: # %identity +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, (%ebx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, 4(%ebx) +; X86-NEXT: movl %ecx, 8(%ebx) +; X86-NEXT: movl %ebp, 12(%ebx) +; X86-NEXT: movl %eax, 16(%ebx) +; X86-NEXT: movl %edi, 20(%ebx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, 24(%ebx) +; X86-NEXT: movl %esi, 28(%ebx) +; X86-NEXT: .LBB13_3: # %identity +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_v4i64_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB13_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl %esi, %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: movl %edi, %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: andl %esi, %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %eax +; X86-SSE-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: andl %esi, %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, %eax +; X86-SSE-NEXT: movl %eax, (%ebx) +; X86-SSE-NEXT: movl %edx, 4(%ebx) +; X86-SSE-NEXT: movl %ecx, 8(%ebx) +; X86-SSE-NEXT: movl %ebp, 12(%ebx) +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 16(%ebx) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 20(%ebx) +; X86-SSE-NEXT: movl %edi, 24(%ebx) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 28(%ebx) +; X86-SSE-NEXT: jmp .LBB13_3 +; X86-SSE-NEXT: .LBB13_2: # %identity +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl %edx, (%ebx) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl %edx, 4(%ebx) +; X86-SSE-NEXT: movl %ecx, 8(%ebx) +; X86-SSE-NEXT: movl %ebp, 12(%ebx) +; X86-SSE-NEXT: movl %eax, 16(%ebx) +; X86-SSE-NEXT: movl %edi, 20(%ebx) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 24(%ebx) +; X86-SSE-NEXT: movl %esi, 28(%ebx) +; X86-SSE-NEXT: .LBB13_3: # %identity +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_v4i64_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: cmpb $0, 24(%ebp) +; X86-SSE2-NEXT: je .LBB13_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movaps 8(%ebp), %xmm3 +; X86-SSE2-NEXT: andnps %xmm0, %xmm2 +; X86-SSE2-NEXT: andnps %xmm1, %xmm3 +; X86-SSE2-NEXT: movaps %xmm2, %xmm0 +; X86-SSE2-NEXT: movaps %xmm3, %xmm1 +; X86-SSE2-NEXT: .LBB13_2: # %identity +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_v4i64_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $8, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB13_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ebp, %esi, %esi +; X86-BMI-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ebx, %esi, %esi +; X86-BMI-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %edi, %esi, %edi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %esi +; X86-BMI-NEXT: movl %ecx, %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: andnl %ebx, %edx, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ebp, %ebp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ebx, %ebx +; X86-BMI-NEXT: movl %ebx, (%eax) +; X86-BMI-NEXT: movl %ebp, 4(%eax) +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %edi, 20(%eax) +; X86-BMI-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 24(%eax) +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 28(%eax) +; X86-BMI-NEXT: jmp .LBB13_3 +; X86-BMI-NEXT: .LBB13_2: # %identity +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, (%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %ecx, 8(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %edi, 20(%eax) +; X86-BMI-NEXT: movl %ebx, 24(%eax) +; X86-BMI-NEXT: movl %ebp, 28(%eax) +; X86-BMI-NEXT: .LBB13_3: # %identity +; X86-BMI-NEXT: addl $8, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_v4i64_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %edi, %edi +; X64-NOAVX2-NEXT: je .LBB13_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: andnps %xmm0, %xmm2 +; X64-NOAVX2-NEXT: andnps %xmm1, %xmm3 +; X64-NOAVX2-NEXT: movaps %xmm2, %xmm0 +; X64-NOAVX2-NEXT: movaps %xmm3, %xmm1 +; X64-NOAVX2-NEXT: .LBB13_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_v4i64_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %edi, %edi +; X64-AVX2-NEXT: je .LBB13_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vandnps %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: .LBB13_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor <4 x i64> %m, splat (i64 -1) + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i64> %x, %a + ret <4 x i64> %masked + +identity: + ret <4 x i64> %x +} + +define <8 x i8> @and_sink_not_splat_v8i8(<8 x i8> %x, i8 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v8i8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB14_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movb %dl, %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: notb %dl +; X86-NEXT: andb %dl, %ch +; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: andb %dl, %ch +; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: andb %dl, %ch +; X86-NEXT: andb %dl, %dh +; X86-NEXT: andb %dl, %bl +; X86-NEXT: andb %dl, %bh +; X86-NEXT: andb %dl, %cl +; X86-NEXT: andb {{[0-9]+}}(%esp), %dl +; X86-NEXT: movb %dl, (%eax) +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movb %bh, 2(%eax) +; X86-NEXT: movb %bl, 3(%eax) +; X86-NEXT: movb %dh, 4(%eax) +; X86-NEXT: movb %ch, 5(%eax) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 7(%eax) +; X86-NEXT: jmp .LBB14_3 +; X86-NEXT: .LBB14_2: # %identity +; X86-NEXT: movb %ch, (%eax) +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movb %bh, 2(%eax) +; X86-NEXT: movb %bl, 3(%eax) +; X86-NEXT: movb %dh, 4(%eax) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %cl, 5(%eax) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movb %dl, 7(%eax) +; X86-NEXT: .LBB14_3: # %identity +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v8i8: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB14_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movb %dl, %ch +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-SSE-NEXT: notb %dl +; X86-SSE-NEXT: andb %dl, %ch +; X86-SSE-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: andb %dl, %ch +; X86-SSE-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: andb %dl, %ch +; X86-SSE-NEXT: andb %dl, %dh +; X86-SSE-NEXT: andb %dl, %bl +; X86-SSE-NEXT: andb %dl, %bh +; X86-SSE-NEXT: andb %dl, %cl +; X86-SSE-NEXT: andb {{[0-9]+}}(%esp), %dl +; X86-SSE-NEXT: movb %dl, (%eax) +; X86-SSE-NEXT: movb %cl, 1(%eax) +; X86-SSE-NEXT: movb %bh, 2(%eax) +; X86-SSE-NEXT: movb %bl, 3(%eax) +; X86-SSE-NEXT: movb %dh, 4(%eax) +; X86-SSE-NEXT: movb %ch, 5(%eax) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 6(%eax) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 7(%eax) +; X86-SSE-NEXT: jmp .LBB14_3 +; X86-SSE-NEXT: .LBB14_2: # %identity +; X86-SSE-NEXT: movb %ch, (%eax) +; X86-SSE-NEXT: movb %cl, 1(%eax) +; X86-SSE-NEXT: movb %bh, 2(%eax) +; X86-SSE-NEXT: movb %bl, 3(%eax) +; X86-SSE-NEXT: movb %dh, 4(%eax) +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb %cl, 5(%eax) +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb %cl, 6(%eax) +; X86-SSE-NEXT: movb %dl, 7(%eax) +; X86-SSE-NEXT: .LBB14_3: # %identity +; X86-SSE-NEXT: addl $4, %esp +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v8i8: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB14_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: notb %al +; X86-SSE2-NEXT: movzbl %al, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; X86-SSE2-NEXT: pand %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB14_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v8i8: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB14_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movb %dl, %ch +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI-NEXT: notb %dl +; X86-BMI-NEXT: andb %dl, %ch +; X86-BMI-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: andb %dl, %ch +; X86-BMI-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: andb %dl, %ch +; X86-BMI-NEXT: andb %dl, %dh +; X86-BMI-NEXT: andb %dl, %bl +; X86-BMI-NEXT: andb %dl, %bh +; X86-BMI-NEXT: andb %dl, %cl +; X86-BMI-NEXT: andb {{[0-9]+}}(%esp), %dl +; X86-BMI-NEXT: movb %dl, 1(%eax) +; X86-BMI-NEXT: movb %cl, (%eax) +; X86-BMI-NEXT: movb %bh, 2(%eax) +; X86-BMI-NEXT: movb %bl, 3(%eax) +; X86-BMI-NEXT: movb %dh, 4(%eax) +; X86-BMI-NEXT: movb %ch, 5(%eax) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 6(%eax) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 7(%eax) +; X86-BMI-NEXT: jmp .LBB14_3 +; X86-BMI-NEXT: .LBB14_2: # %identity +; X86-BMI-NEXT: movb %cl, (%eax) +; X86-BMI-NEXT: movb %ch, 1(%eax) +; X86-BMI-NEXT: movb %bh, 2(%eax) +; X86-BMI-NEXT: movb %bl, 3(%eax) +; X86-BMI-NEXT: movb %dh, 4(%eax) +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb %cl, 5(%eax) +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb %cl, 6(%eax) +; X86-BMI-NEXT: movb %dl, 7(%eax) +; X86-BMI-NEXT: .LBB14_3: # %identity +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v8i8: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB14_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: notb %dil +; X64-NOAVX2-NEXT: movzbl %dil, %eax +; X64-NOAVX2-NEXT: movd %eax, %xmm1 +; X64-NOAVX2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X64-NOAVX2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; X64-NOAVX2-NEXT: pand %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movdqa %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB14_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v8i8: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB14_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: notb %dil +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 +; X64-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB14_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i8 %m, -1 + %head = insertelement <8 x i8> poison, i8 %a, i8 0 + %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <8 x i8> %splat, %x + ret <8 x i8> %masked + +identity: + ret <8 x i8> %x +} + +define <8 x i8> @and_sink_not_splat_v8i8_swapped(<8 x i8> %x, i8 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v8i8_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB15_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: notb %ch +; X86-NEXT: andb %ch, %dh +; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: andb %ch, %dh +; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: andb %ch, %dh +; X86-NEXT: andb %ch, %bl +; X86-NEXT: andb %ch, %bh +; X86-NEXT: andb %ch, %cl +; X86-NEXT: andb %ch, %dl +; X86-NEXT: movb %dl, (%eax) +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movb %bh, 2(%eax) +; X86-NEXT: movb %bl, 3(%eax) +; X86-NEXT: movb %dh, 4(%eax) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 5(%eax) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb %cl, 7(%eax) +; X86-NEXT: jmp .LBB15_3 +; X86-NEXT: .LBB15_2: # %identity +; X86-NEXT: movb %dl, (%eax) +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movb %bh, 2(%eax) +; X86-NEXT: movb %bl, 3(%eax) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %cl, 4(%eax) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %cl, 5(%eax) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movb %dh, 7(%eax) +; X86-NEXT: .LBB15_3: # %identity +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v8i8_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB15_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-SSE-NEXT: notb %ch +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE-NEXT: andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-SSE-NEXT: andb %ch, %dh +; X86-SSE-NEXT: andb %ch, %bl +; X86-SSE-NEXT: andb %ch, %bh +; X86-SSE-NEXT: andb %ch, %cl +; X86-SSE-NEXT: andb %ch, %dl +; X86-SSE-NEXT: movb %dl, (%eax) +; X86-SSE-NEXT: movb %cl, 1(%eax) +; X86-SSE-NEXT: movb %bh, 2(%eax) +; X86-SSE-NEXT: movb %bl, 3(%eax) +; X86-SSE-NEXT: movb %dh, 4(%eax) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 5(%eax) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 6(%eax) +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, 7(%eax) +; X86-SSE-NEXT: jmp .LBB15_3 +; X86-SSE-NEXT: .LBB15_2: # %identity +; X86-SSE-NEXT: movb %dl, (%eax) +; X86-SSE-NEXT: movb %cl, 1(%eax) +; X86-SSE-NEXT: movb %bh, 2(%eax) +; X86-SSE-NEXT: movb %bl, 3(%eax) +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb %cl, 4(%eax) +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb %cl, 5(%eax) +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movb %cl, 6(%eax) +; X86-SSE-NEXT: movb %dh, 7(%eax) +; X86-SSE-NEXT: .LBB15_3: # %identity +; X86-SSE-NEXT: addl $4, %esp +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v8i8_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB15_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: notb %al +; X86-SSE2-NEXT: movzbl %al, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; X86-SSE2-NEXT: pand %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB15_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v8i8_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %bh +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB15_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI-NEXT: notb %ch +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-BMI-NEXT: andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-BMI-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-BMI-NEXT: andb %ch, %dh +; X86-BMI-NEXT: andb %ch, %bl +; X86-BMI-NEXT: andb %ch, %bh +; X86-BMI-NEXT: andb %ch, %dl +; X86-BMI-NEXT: andb %ch, %cl +; X86-BMI-NEXT: movb %cl, 1(%eax) +; X86-BMI-NEXT: movb %dl, (%eax) +; X86-BMI-NEXT: movb %bh, 2(%eax) +; X86-BMI-NEXT: movb %bl, 3(%eax) +; X86-BMI-NEXT: movb %dh, 4(%eax) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 5(%eax) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 6(%eax) +; X86-BMI-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-BMI-NEXT: movb %cl, 7(%eax) +; X86-BMI-NEXT: jmp .LBB15_3 +; X86-BMI-NEXT: .LBB15_2: # %identity +; X86-BMI-NEXT: movb %dl, (%eax) +; X86-BMI-NEXT: movb %cl, 1(%eax) +; X86-BMI-NEXT: movb %bh, 2(%eax) +; X86-BMI-NEXT: movb %bl, 3(%eax) +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb %cl, 4(%eax) +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb %cl, 5(%eax) +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movb %cl, 6(%eax) +; X86-BMI-NEXT: movb %dh, 7(%eax) +; X86-BMI-NEXT: .LBB15_3: # %identity +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v8i8_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB15_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: notb %dil +; X64-NOAVX2-NEXT: movzbl %dil, %eax +; X64-NOAVX2-NEXT: movd %eax, %xmm1 +; X64-NOAVX2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X64-NOAVX2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; X64-NOAVX2-NEXT: pand %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB15_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v8i8_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB15_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: notb %dil +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: .LBB15_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i8 %m, -1 + %head = insertelement <8 x i8> poison, i8 %a, i8 0 + %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <8 x i8> %x, %splat + ret <8 x i8> %masked + +identity: + ret <8 x i8> %x +} + +define <4 x i32> @and_sink_not_splat_v4i32(<4 x i32> %x, i32 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v4i32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB16_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %ecx +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: jmp .LBB16_3 +; X86-NEXT: .LBB16_2: # %identity +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: .LBB16_3: # %identity +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v4i32: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $32, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB16_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: notl %ebx +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, (%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE-NEXT: andps %xmm0, %xmm1 +; X86-SSE-NEXT: movaps %xmm1, (%eax) +; X86-SSE-NEXT: jmp .LBB16_3 +; X86-SSE-NEXT: .LBB16_2: # %identity +; X86-SSE-NEXT: movl %edi, (%eax) +; X86-SSE-NEXT: movl %esi, 4(%eax) +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl %ecx, 12(%eax) +; X86-SSE-NEXT: .LBB16_3: # %identity +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v4i32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB16_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB16_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v4i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB16_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %ecx, %ebx, %ecx +; X86-BMI-NEXT: andnl %edx, %ebx, %edx +; X86-BMI-NEXT: andnl %esi, %ebx, %esi +; X86-BMI-NEXT: andnl %edi, %ebx, %edi +; X86-BMI-NEXT: .LBB16_2: # %identity +; X86-BMI-NEXT: movl %edi, (%eax) +; X86-BMI-NEXT: movl %esi, 4(%eax) +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v4i32: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB16_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: movd %edi, %xmm1 +; X64-NOAVX2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; X64-NOAVX2-NEXT: pandn %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movdqa %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB16_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v4i32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB16_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; X64-AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB16_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i32 %m, -1 + %head = insertelement <4 x i32> poison, i32 %a, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i32> %splat, %x + ret <4 x i32> %masked + +identity: + ret <4 x i32> %x +} + +define <4 x i32> @and_sink_not_splat_v4i32_swapped(<4 x i32> %x, i32 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v4i32_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB17_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl %ebx, %ecx +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: .LBB17_2: # %identity +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v4i32_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $32, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB17_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: notl %ebx +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, (%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE-NEXT: andps %xmm0, %xmm1 +; X86-SSE-NEXT: movaps %xmm1, (%eax) +; X86-SSE-NEXT: jmp .LBB17_3 +; X86-SSE-NEXT: .LBB17_2: # %identity +; X86-SSE-NEXT: movl %edi, (%eax) +; X86-SSE-NEXT: movl %esi, 4(%eax) +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl %ecx, 12(%eax) +; X86-SSE-NEXT: .LBB17_3: # %identity +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v4i32_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB17_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: .LBB17_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v4i32_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: je .LBB17_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: andnl %ecx, %ebx, %ecx +; X86-BMI-NEXT: andnl %edx, %ebx, %edx +; X86-BMI-NEXT: andnl %esi, %ebx, %esi +; X86-BMI-NEXT: andnl %edi, %ebx, %edi +; X86-BMI-NEXT: .LBB17_2: # %identity +; X86-BMI-NEXT: movl %edi, (%eax) +; X86-BMI-NEXT: movl %esi, 4(%eax) +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ecx, 12(%eax) +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v4i32_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB17_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: movd %edi, %xmm1 +; X64-NOAVX2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; X64-NOAVX2-NEXT: pandn %xmm0, %xmm1 +; X64-NOAVX2-NEXT: movdqa %xmm1, %xmm0 +; X64-NOAVX2-NEXT: .LBB17_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v4i32_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB17_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; X64-AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; X64-AVX2-NEXT: .LBB17_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i32 %m, -1 + %head = insertelement <4 x i32> poison, i32 %a, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i32> %x, %splat + ret <4 x i32> %masked + +identity: + ret <4 x i32> %x +} + +define <4 x i64> @and_sink_not_splat_v4i64(<4 x i64> %x, i64 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v4i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB18_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %esi +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl %edx, (%esp) # 4-byte Folded Spill +; X86-NEXT: andl %edx, %edi +; X86-NEXT: andl %edx, %ebp +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edi, 16(%eax) +; X86-NEXT: movl %esi, 20(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: jmp .LBB18_3 +; X86-NEXT: .LBB18_2: # %identity +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edi, 16(%eax) +; X86-NEXT: movl %esi, 20(%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl %edx, 28(%eax) +; X86-NEXT: .LBB18_3: # %identity +; X86-NEXT: addl $8, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v4i64: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $8, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: je .LBB18_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: notl %ecx +; X86-SSE-NEXT: andl %ecx, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: andl %ecx, %esi +; X86-SSE-NEXT: andl %ecx, %ebx +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: notl %edx +; X86-SSE-NEXT: andl %edx, (%esp) # 4-byte Folded Spill +; X86-SSE-NEXT: andl %edx, %edi +; X86-SSE-NEXT: andl %edx, %ebp +; X86-SSE-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl %edx, (%eax) +; X86-SSE-NEXT: movl %ecx, 4(%eax) +; X86-SSE-NEXT: movl %ebp, 8(%eax) +; X86-SSE-NEXT: movl %ebx, 12(%eax) +; X86-SSE-NEXT: movl %edi, 16(%eax) +; X86-SSE-NEXT: movl %esi, 20(%eax) +; X86-SSE-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 24(%eax) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 28(%eax) +; X86-SSE-NEXT: jmp .LBB18_3 +; X86-SSE-NEXT: .LBB18_2: # %identity +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, (%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 4(%eax) +; X86-SSE-NEXT: movl %ebp, 8(%eax) +; X86-SSE-NEXT: movl %ebx, 12(%eax) +; X86-SSE-NEXT: movl %edi, 16(%eax) +; X86-SSE-NEXT: movl %esi, 20(%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 24(%eax) +; X86-SSE-NEXT: movl %edx, 28(%eax) +; X86-SSE-NEXT: .LBB18_3: # %identity +; X86-SSE-NEXT: addl $8, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v4i64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB18_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 +; X86-SSE2-NEXT: pandn %xmm0, %xmm3 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: .LBB18_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v4i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $8, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: je .LBB18_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ecx, %esi, %ecx +; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: andnl %ebx, %esi, %ecx +; X86-BMI-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI-NEXT: andnl %ebp, %esi, %ebp +; X86-BMI-NEXT: andnl %edx, %esi, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %ebx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %esi +; X86-BMI-NEXT: movl %esi, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %ecx, 8(%eax) +; X86-BMI-NEXT: movl %ebp, 12(%eax) +; X86-BMI-NEXT: movl %edi, 16(%eax) +; X86-BMI-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 20(%eax) +; X86-BMI-NEXT: movl %ebx, 24(%eax) +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-BMI-NEXT: jmp .LBB18_3 +; X86-BMI-NEXT: .LBB18_2: # %identity +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl %edi, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ebp, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %ebx, 20(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 24(%eax) +; X86-BMI-NEXT: .LBB18_3: # %identity +; X86-BMI-NEXT: movl %ecx, 28(%eax) +; X86-BMI-NEXT: addl $8, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v4i64: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB18_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: notq %rdi +; X64-NOAVX2-NEXT: movq %rdi, %xmm2 +; X64-NOAVX2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; X64-NOAVX2-NEXT: pand %xmm2, %xmm0 +; X64-NOAVX2-NEXT: pand %xmm2, %xmm1 +; X64-NOAVX2-NEXT: .LBB18_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v4i64: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB18_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vmovq %rdi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 +; X64-AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: .LBB18_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i64 %m, -1 + %head = insertelement <4 x i64> poison, i64 %a, i64 0 + %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i64> %splat, %x + ret <4 x i64> %masked + +identity: + ret <4 x i64> %x +} + +define <4 x i64> @and_sink_not_splat_v4i64_swapped(<4 x i64> %x, i64 %m, i1 zeroext %cond) nounwind { +; X86-LABEL: and_sink_not_splat_v4i64_swapped: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: je .LBB19_2 +; X86-NEXT: # %bb.1: # %mask +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, (%esp) # 4-byte Folded Spill +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: andl %esi, %ebp +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ebx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl %edi, 24(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: jmp .LBB19_3 +; X86-NEXT: .LBB19_2: # %identity +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl %edi, 20(%eax) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl %esi, 28(%eax) +; X86-NEXT: .LBB19_3: # %identity +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X86-SSE-LABEL: and_sink_not_splat_v4i64_swapped: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: je .LBB19_2 +; X86-SSE-NEXT: # %bb.1: # %mask +; X86-SSE-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: andl %esi, (%esp) # 4-byte Folded Spill +; X86-SSE-NEXT: andl %esi, %edi +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: andl %esi, %ebx +; X86-SSE-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: andl %esi, %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: notl %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: andl %esi, %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: andl %esi, %ebx +; X86-SSE-NEXT: andl %esi, %ebp +; X86-SSE-NEXT: andl %esi, %edx +; X86-SSE-NEXT: movl %edx, (%eax) +; X86-SSE-NEXT: movl %ecx, 4(%eax) +; X86-SSE-NEXT: movl %ebp, 8(%eax) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 12(%eax) +; X86-SSE-NEXT: movl %ebx, 16(%eax) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 20(%eax) +; X86-SSE-NEXT: movl %edi, 24(%eax) +; X86-SSE-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 28(%eax) +; X86-SSE-NEXT: jmp .LBB19_3 +; X86-SSE-NEXT: .LBB19_2: # %identity +; X86-SSE-NEXT: movl %edx, (%eax) +; X86-SSE-NEXT: movl %ecx, 4(%eax) +; X86-SSE-NEXT: movl %ebp, 8(%eax) +; X86-SSE-NEXT: movl %ebx, 12(%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 16(%eax) +; X86-SSE-NEXT: movl %edi, 20(%eax) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 24(%eax) +; X86-SSE-NEXT: movl %esi, 28(%eax) +; X86-SSE-NEXT: .LBB19_3: # %identity +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl $4 +; +; X86-SSE2-LABEL: and_sink_not_splat_v4i64_swapped: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: je .LBB19_2 +; X86-SSE2-NEXT: # %bb.1: # %mask +; X86-SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 +; X86-SSE2-NEXT: pandn %xmm0, %xmm3 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: .LBB19_2: # %identity +; X86-SSE2-NEXT: retl +; +; X86-BMI-LABEL: and_sink_not_splat_v4i64_swapped: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $8, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: je .LBB19_2 +; X86-BMI-NEXT: # %bb.1: # %mask +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl %ecx, %esi, %ecx +; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: andnl %ebx, %esi, %ecx +; X86-BMI-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI-NEXT: andnl %ebp, %esi, %ebp +; X86-BMI-NEXT: andnl %edx, %esi, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %ebx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %esi +; X86-BMI-NEXT: movl %esi, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl %ecx, 8(%eax) +; X86-BMI-NEXT: movl %ebp, 12(%eax) +; X86-BMI-NEXT: movl %edi, 16(%eax) +; X86-BMI-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI-NEXT: movl %ecx, 20(%eax) +; X86-BMI-NEXT: movl %ebx, 24(%eax) +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-BMI-NEXT: jmp .LBB19_3 +; X86-BMI-NEXT: .LBB19_2: # %identity +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl %edi, (%eax) +; X86-BMI-NEXT: movl %edx, 4(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 8(%eax) +; X86-BMI-NEXT: movl %ebp, 12(%eax) +; X86-BMI-NEXT: movl %esi, 16(%eax) +; X86-BMI-NEXT: movl %ebx, 20(%eax) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %edx, 24(%eax) +; X86-BMI-NEXT: .LBB19_3: # %identity +; X86-BMI-NEXT: movl %ecx, 28(%eax) +; X86-BMI-NEXT: addl $8, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl $4 +; +; X64-NOAVX2-LABEL: and_sink_not_splat_v4i64_swapped: +; X64-NOAVX2: # %bb.0: +; X64-NOAVX2-NEXT: testl %esi, %esi +; X64-NOAVX2-NEXT: je .LBB19_2 +; X64-NOAVX2-NEXT: # %bb.1: # %mask +; X64-NOAVX2-NEXT: notq %rdi +; X64-NOAVX2-NEXT: movq %rdi, %xmm2 +; X64-NOAVX2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; X64-NOAVX2-NEXT: pand %xmm2, %xmm0 +; X64-NOAVX2-NEXT: pand %xmm2, %xmm1 +; X64-NOAVX2-NEXT: .LBB19_2: # %identity +; X64-NOAVX2-NEXT: retq +; +; X64-AVX2-LABEL: and_sink_not_splat_v4i64_swapped: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: testl %esi, %esi +; X64-AVX2-NEXT: je .LBB19_2 +; X64-AVX2-NEXT: # %bb.1: # %mask +; X64-AVX2-NEXT: vmovq %rdi, %xmm1 +; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 +; X64-AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: .LBB19_2: # %identity +; X64-AVX2-NEXT: retq + %a = xor i64 %m, -1 + %head = insertelement <4 x i64> poison, i64 %a, i64 0 + %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer + br i1 %cond, label %mask, label %identity + +mask: + %masked = and <4 x i64> %x, %splat + ret <4 x i64> %masked + +identity: + ret <4 x i64> %x +}