Skip to content

Conversation

@pfusik
Copy link
Contributor

@pfusik pfusik commented Dec 15, 2025

Undoes a negation being hoisted out of a loop, so that if can be fold
into an inverted bitwise operation in the loop.

Implements #108840 on X86

Undoes a negation being hoisted out of a loop, so that if can be fold
into an inverted bitwise operation in the loop.

Implements llvm#108840 on X86
@llvmbot
Copy link
Member

llvmbot commented Dec 15, 2025

@llvm/pr-subscribers-backend-x86

Author: Piotr Fusik (pfusik)

Changes

Undoes a negation being hoisted out of a loop, so that if can be fold
into an inverted bitwise operation in the loop.

Implements #108840 on X86


Patch is 117.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172329.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+22)
  • (added) llvm/test/CodeGen/X86/andnot-sink-not.ll (+3168)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9fb97918cb71a..b90232b25088b 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7198,6 +7198,28 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
                                             SmallVectorImpl<Use *> &Ops) const {
   using namespace llvm::PatternMatch;
 
+  if (I->getOpcode() == Instruction::And &&
+      (I->getType()->isVectorTy() ? ST->hasSSE2() : ST->hasBMI())) {
+    for (auto &Op : I->operands()) {
+      // (and X, (not Y)) -> (andn X, Y)
+      if (match(Op.get(), m_Not(m_Value()))) {
+        Ops.push_back(&Op);
+        return true;
+      }
+      // (and X, (splat (not Y))) -> (andn X, (splat Y))
+      if (match(Op.get(),
+                m_Shuffle(m_InsertElt(m_Value(), m_Not(m_Value()), m_ZeroInt()),
+                          m_Value(), m_ZeroMask()))) {
+        Use &InsertElt = cast<Instruction>(Op)->getOperandUse(0);
+        Use &Not = cast<Instruction>(InsertElt)->getOperandUse(1);
+        Ops.push_back(&Not);
+        Ops.push_back(&InsertElt);
+        Ops.push_back(&Op);
+        return true;
+      }
+    }
+  }
+
   FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
   if (!VTy)
     return false;
diff --git a/llvm/test/CodeGen/X86/andnot-sink-not.ll b/llvm/test/CodeGen/X86/andnot-sink-not.ll
new file mode 100644
index 0000000000000..8463b8ae4e750
--- /dev/null
+++ b/llvm/test/CodeGen/X86/andnot-sink-not.ll
@@ -0,0 +1,3168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi | FileCheck %s --check-prefixes=X86-NOBMI,X86
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE2
+; RUN: llc < %s -mtriple=i686-- -mattr=+bmi | FileCheck %s --check-prefixes=X86-BMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOAVX2,X64-NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI,X64-NOAVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi,+avx2 | FileCheck %s --check-prefixes=X64,X64-BMI,X64-AVX2
+
+define i8 @and_sink_not_i8(i8 %x, i8 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i8:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB0_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notb %cl
+; X86-NOBMI-NEXT:    andb %al, %cl
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB0_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i8:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB0_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notb %cl
+; X86-BMI-NEXT:    andb %al, %cl
+; X86-BMI-NEXT:    movl %ecx, %eax
+; X86-BMI-NEXT:  .LBB0_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notb %sil
+; X64-NEXT:    andb %dil, %sil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB0_2: # %identity
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  %a = xor i8 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i8 %a, %x
+  ret i8 %masked
+
+identity:
+  ret i8 %x
+}
+
+define i8 @and_sink_not_i8_swapped(i8 %x, i8 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i8_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB1_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notb %cl
+; X86-NOBMI-NEXT:    andb %cl, %al
+; X86-NOBMI-NEXT:  .LBB1_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i8_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB1_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notb %cl
+; X86-BMI-NEXT:    andb %cl, %al
+; X86-BMI-NEXT:  .LBB1_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i8_swapped:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB1_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notb %sil
+; X64-NEXT:    andb %sil, %al
+; X64-NEXT:  .LBB1_2: # %identity
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = xor i8 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i8 %x, %a
+  ret i8 %masked
+
+identity:
+  ret i8 %x
+}
+
+define i16 @and_sink_not_i16(i16 %x, i16 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i16:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB2_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    retl
+; X86-NOBMI-NEXT:  .LBB2_2: # %identity
+; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i16:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB2_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI-NEXT:  .LBB2_2: # %identity
+; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB2_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    andl %edi, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+; X64-NOBMI-NEXT:  .LBB2_2: # %identity
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB2_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB2_2: # %identity
+; X64-BMI-NEXT:    movl %edi, %eax
+; X64-BMI-NEXT:    retq
+  %a = xor i16 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i16 %a, %x
+  ret i16 %masked
+
+identity:
+  ret i16 %x
+}
+
+define i16 @and_sink_not_i16_swapped(i16 %x, i16 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i16_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB3_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB3_2: # %identity
+; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i16_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB3_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI-NEXT:  .LBB3_2: # %identity
+; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i16_swapped:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB3_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:  .LBB3_2: # %identity
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i16_swapped:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB3_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB3_2: # %identity
+; X64-BMI-NEXT:    movl %edi, %eax
+; X64-BMI-NEXT:    retq
+  %a = xor i16 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i16 %x, %a
+  ret i16 %masked
+
+identity:
+  ret i16 %x
+}
+
+define i32 @and_sink_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB4_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB4_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB4_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI-NEXT:  .LBB4_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB4_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    andl %edi, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+; X64-NOBMI-NEXT:  .LBB4_2: # %identity
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB4_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB4_2: # %identity
+; X64-BMI-NEXT:    movl %edi, %eax
+; X64-BMI-NEXT:    retq
+  %a = xor i32 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i32 %a, %x
+  ret i32 %masked
+
+identity:
+  ret i32 %x
+}
+
+define i32 @and_sink_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i32_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB5_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB5_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i32_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB5_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI-NEXT:  .LBB5_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i32_swapped:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB5_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:  .LBB5_2: # %identity
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i32_swapped:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB5_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB5_2: # %identity
+; X64-BMI-NEXT:    movl %edi, %eax
+; X64-BMI-NEXT:    retq
+  %a = xor i32 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i32 %x, %a
+  ret i32 %masked
+
+identity:
+  ret i32 %x
+}
+
+define i64 @and_sink_not_i64(i64 %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-NOBMI-LABEL: and_sink_not_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB6_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    notl %esi
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:  .LBB6_2: # %identity
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB6_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    andnl %eax, %esi, %eax
+; X86-BMI-NEXT:    andnl %edx, %ecx, %edx
+; X86-BMI-NEXT:  .LBB6_2: # %identity
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB6_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notq %rsi
+; X64-NOBMI-NEXT:    andq %rdi, %rsi
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    retq
+; X64-NOBMI-NEXT:  .LBB6_2: # %identity
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB6_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB6_2: # %identity
+; X64-BMI-NEXT:    movq %rdi, %rax
+; X64-BMI-NEXT:    retq
+  %a = xor i64 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i64 %a, %x
+  ret i64 %masked
+
+identity:
+  ret i64 %x
+}
+
+define i64 @and_sink_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-NOBMI-LABEL: and_sink_not_i64_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB7_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    notl %esi
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:    andl %esi, %edx
+; X86-NOBMI-NEXT:  .LBB7_2: # %identity
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i64_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB7_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    andnl %eax, %esi, %eax
+; X86-BMI-NEXT:    andnl %edx, %ecx, %edx
+; X86-BMI-NEXT:  .LBB7_2: # %identity
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: and_sink_not_i64_swapped:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    testl %edx, %edx
+; X64-NOBMI-NEXT:    je .LBB7_2
+; X64-NOBMI-NEXT:  # %bb.1: # %mask
+; X64-NOBMI-NEXT:    notq %rsi
+; X64-NOBMI-NEXT:    andq %rsi, %rax
+; X64-NOBMI-NEXT:  .LBB7_2: # %identity
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: and_sink_not_i64_swapped:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    testl %edx, %edx
+; X64-BMI-NEXT:    je .LBB7_2
+; X64-BMI-NEXT:  # %bb.1: # %mask
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
+; X64-BMI-NEXT:  .LBB7_2: # %identity
+; X64-BMI-NEXT:    movq %rdi, %rax
+; X64-BMI-NEXT:    retq
+  %a = xor i64 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i64 %x, %a
+  ret i64 %masked
+
+identity:
+  ret i64 %x
+}
+
+define <8 x i8> @and_sink_not_v8i8(<8 x i8> %x, <8 x i8> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v8i8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB8_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    notb %dh
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %dl, %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    notb %dl
+; X86-NEXT:    andb %cl, %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    notb %cl
+; X86-NEXT:    andb %bh, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    notb %bh
+; X86-NEXT:    andb %bl, %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    notb %bl
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    notb %al
+; X86-NEXT:    andb %ah, %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    notb %ah
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb %ah, (%esi)
+; X86-NEXT:    movb %al, 1(%esi)
+; X86-NEXT:    movb %bl, 2(%esi)
+; X86-NEXT:    movb %bh, 3(%esi)
+; X86-NEXT:    movb %cl, 4(%esi)
+; X86-NEXT:    movb %dl, 5(%esi)
+; X86-NEXT:    movb %ch, 6(%esi)
+; X86-NEXT:    movb %dh, 7(%esi)
+; X86-NEXT:    jmp .LBB8_3
+; X86-NEXT:  .LBB8_2: # %identity
+; X86-NEXT:    movb %al, (%esi)
+; X86-NEXT:    movb %ah, 1(%esi)
+; X86-NEXT:    movb %dh, 2(%esi)
+; X86-NEXT:    movb %bl, 3(%esi)
+; X86-NEXT:    movb %bh, 4(%esi)
+; X86-NEXT:    movb %cl, 5(%esi)
+; X86-NEXT:    movb %dl, 6(%esi)
+; X86-NEXT:    movb %ch, 7(%esi)
+; X86-NEXT:  .LBB8_3: # %identity
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB8_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    notb %dh
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X8...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants