Skip to content

Conversation

@davemgreen
Copy link
Collaborator

It looks like this is already handled for funnel shifts, we can do the same for the or created when lowering G_ROTR and G_ROTL. This allows some more add-like-ors to match.

It looks like this is already handled for funnel shifts, we can do the same for
the or created when lowering G_ROTR and G_ROTL. This allows some more
add-like-ors to match.
@llvmbot
Copy link
Member

llvmbot commented Dec 15, 2025

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-backend-risc-v

Author: David Green (davemgreen)

Changes

It looks like this is already handled for funnel shifts, we can do the same for the or created when lowering G_ROTR and G_ROTL. This allows some more add-like-ors to match.


Patch is 39.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172317.diff

7 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/fsh.ll (+167-327)
  • (modified) llvm/test/CodeGen/AArch64/rax1.ll (+4-6)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir (+9-9)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv32.mir (+6-6)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-rotate-rv64.mir (+8-8)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 251ea4b1e019a..433107ff32753 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8078,7 +8078,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
     RevShiftVal =
         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
   }
-  MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
+  MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
   MI.eraseFromParent();
   return Legalized;
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
index d2e8f15fced8e..2f0ac5ea76f2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
@@ -116,7 +116,7 @@ body:             |
     ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[AND]](<4 x s32>)
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SUB]], [[BUILD_VECTOR1]]
     ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[AND1]](<4 x s32>)
-    ; CHECK-NEXT: %rot:_(<4 x s32>) = G_OR [[SHL]], [[LSHR]]
+    ; CHECK-NEXT: %rot:_(<4 x s32>) = disjoint G_OR [[SHL]], [[LSHR]]
     ; CHECK-NEXT: $q0 = COPY %rot(<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(<4 x s32>) = COPY $q0
@@ -148,7 +148,7 @@ body:             |
     ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[AND]](<4 x s32>)
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SUB]], [[BUILD_VECTOR1]]
     ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[AND1]](<4 x s32>)
-    ; CHECK-NEXT: %rot:_(<4 x s32>) = G_OR [[LSHR]], [[SHL]]
+    ; CHECK-NEXT: %rot:_(<4 x s32>) = disjoint G_OR [[LSHR]], [[SHL]]
     ; CHECK-NEXT: $q0 = COPY %rot(<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 1db776ea6f616..eb2ca906a9cf2 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -3175,114 +3175,72 @@ entry:
 }
 
 define <8 x i8> @rotl_v8i8_c(<8 x i8> %a) {
-; CHECK-SD-LABEL: rotl_v8i8_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.8b, v0.8b, #3
-; CHECK-SD-NEXT:    usra v1.8b, v0.8b, #5
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v8i8_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.8b, v0.8b, #3
-; CHECK-GI-NEXT:    ushr v0.8b, v0.8b, #5
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v8i8_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.8b, v0.8b, #3
+; CHECK-NEXT:    usra v1.8b, v0.8b, #5
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <8 x i8> @llvm.fshl(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
   ret <8 x i8> %d
 }
 
 define <8 x i8> @rotr_v8i8_c(<8 x i8> %a) {
-; CHECK-SD-LABEL: rotr_v8i8_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.8b, v0.8b, #5
-; CHECK-SD-NEXT:    usra v1.8b, v0.8b, #3
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v8i8_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.8b, v0.8b, #3
-; CHECK-GI-NEXT:    shl v0.8b, v0.8b, #5
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v8i8_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.8b, v0.8b, #5
+; CHECK-NEXT:    usra v1.8b, v0.8b, #3
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <8 x i8> @llvm.fshr(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
   ret <8 x i8> %d
 }
 
 define <16 x i8> @rotl_v16i8_c(<16 x i8> %a) {
-; CHECK-SD-LABEL: rotl_v16i8_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.16b, v0.16b, #3
-; CHECK-SD-NEXT:    usra v1.16b, v0.16b, #5
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v16i8_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.16b, v0.16b, #3
-; CHECK-GI-NEXT:    ushr v0.16b, v0.16b, #5
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v16i8_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.16b, v0.16b, #3
+; CHECK-NEXT:    usra v1.16b, v0.16b, #5
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <16 x i8> @llvm.fshl(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
   ret <16 x i8> %d
 }
 
 define <16 x i8> @rotr_v16i8_c(<16 x i8> %a) {
-; CHECK-SD-LABEL: rotr_v16i8_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.16b, v0.16b, #5
-; CHECK-SD-NEXT:    usra v1.16b, v0.16b, #3
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v16i8_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.16b, v0.16b, #3
-; CHECK-GI-NEXT:    shl v0.16b, v0.16b, #5
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v16i8_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.16b, v0.16b, #5
+; CHECK-NEXT:    usra v1.16b, v0.16b, #3
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <16 x i8> @llvm.fshr(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
   ret <16 x i8> %d
 }
 
 define <4 x i16> @rotl_v4i16_c(<4 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v4i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.4h, v0.4h, #3
-; CHECK-SD-NEXT:    usra v1.4h, v0.4h, #13
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v4i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.4h, v0.4h, #3
-; CHECK-GI-NEXT:    ushr v0.4h, v0.4h, #13
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v4i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.4h, v0.4h, #3
+; CHECK-NEXT:    usra v1.4h, v0.4h, #13
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <4 x i16> @llvm.fshl(<4 x i16> %a, <4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
   ret <4 x i16> %d
 }
 
 define <4 x i16> @rotr_v4i16_c(<4 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v4i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.4h, v0.4h, #13
-; CHECK-SD-NEXT:    usra v1.4h, v0.4h, #3
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v4i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.4h, v0.4h, #3
-; CHECK-GI-NEXT:    shl v0.4h, v0.4h, #13
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v4i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.4h, v0.4h, #13
+; CHECK-NEXT:    usra v1.4h, v0.4h, #3
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <4 x i16> @llvm.fshr(<4 x i16> %a, <4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
   ret <4 x i16> %d
@@ -3369,164 +3327,102 @@ entry:
 }
 
 define <8 x i16> @rotl_v8i16_c(<8 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v8i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.8h, v0.8h, #3
-; CHECK-SD-NEXT:    usra v1.8h, v0.8h, #13
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v8i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.8h, v0.8h, #3
-; CHECK-GI-NEXT:    ushr v0.8h, v0.8h, #13
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v8i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.8h, v0.8h, #3
+; CHECK-NEXT:    usra v1.8h, v0.8h, #13
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <8 x i16> @llvm.fshl(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
   ret <8 x i16> %d
 }
 
 define <8 x i16> @rotr_v8i16_c(<8 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v8i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.8h, v0.8h, #13
-; CHECK-SD-NEXT:    usra v1.8h, v0.8h, #3
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v8i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.8h, v0.8h, #3
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #13
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v8i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.8h, v0.8h, #13
+; CHECK-NEXT:    usra v1.8h, v0.8h, #3
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <8 x i16> @llvm.fshr(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
   ret <8 x i16> %d
 }
 
 define <16 x i16> @rotl_v16i16_c(<16 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v16i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v2.8h, v0.8h, #3
-; CHECK-SD-NEXT:    shl v3.8h, v1.8h, #3
-; CHECK-SD-NEXT:    usra v2.8h, v0.8h, #13
-; CHECK-SD-NEXT:    usra v3.8h, v1.8h, #13
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    mov v1.16b, v3.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v16i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v2.8h, v0.8h, #3
-; CHECK-GI-NEXT:    shl v3.8h, v1.8h, #3
-; CHECK-GI-NEXT:    ushr v0.8h, v0.8h, #13
-; CHECK-GI-NEXT:    ushr v1.8h, v1.8h, #13
-; CHECK-GI-NEXT:    orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT:    orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v16i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v2.8h, v0.8h, #3
+; CHECK-NEXT:    shl v3.8h, v1.8h, #3
+; CHECK-NEXT:    usra v2.8h, v0.8h, #13
+; CHECK-NEXT:    usra v3.8h, v1.8h, #13
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <16 x i16> @llvm.fshl(<16 x i16> %a, <16 x i16> %a, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
   ret <16 x i16> %d
 }
 
 define <16 x i16> @rotr_v16i16_c(<16 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v16i16_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v2.8h, v0.8h, #13
-; CHECK-SD-NEXT:    shl v3.8h, v1.8h, #13
-; CHECK-SD-NEXT:    usra v2.8h, v0.8h, #3
-; CHECK-SD-NEXT:    usra v3.8h, v1.8h, #3
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    mov v1.16b, v3.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v16i16_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v2.8h, v0.8h, #3
-; CHECK-GI-NEXT:    ushr v3.8h, v1.8h, #3
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #13
-; CHECK-GI-NEXT:    shl v1.8h, v1.8h, #13
-; CHECK-GI-NEXT:    orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT:    orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v16i16_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v2.8h, v0.8h, #13
+; CHECK-NEXT:    shl v3.8h, v1.8h, #13
+; CHECK-NEXT:    usra v2.8h, v0.8h, #3
+; CHECK-NEXT:    usra v3.8h, v1.8h, #3
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <16 x i16> @llvm.fshr(<16 x i16> %a, <16 x i16> %a, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
   ret <16 x i16> %d
 }
 
 define <2 x i32> @rotl_v2i32_c(<2 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v2i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.2s, v0.2s, #3
-; CHECK-SD-NEXT:    usra v1.2s, v0.2s, #29
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v2i32_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.2s, v0.2s, #3
-; CHECK-GI-NEXT:    ushr v0.2s, v0.2s, #29
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v2i32_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.2s, v0.2s, #3
+; CHECK-NEXT:    usra v1.2s, v0.2s, #29
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <2 x i32> @llvm.fshl(<2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 3, i32 3>)
   ret <2 x i32> %d
 }
 
 define <2 x i32> @rotr_v2i32_c(<2 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v2i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.2s, v0.2s, #29
-; CHECK-SD-NEXT:    usra v1.2s, v0.2s, #3
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v2i32_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.2s, v0.2s, #3
-; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #29
-; CHECK-GI-NEXT:    orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v2i32_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.2s, v0.2s, #29
+; CHECK-NEXT:    usra v1.2s, v0.2s, #3
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %d = call <2 x i32> @llvm.fshr(<2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 3, i32 3>)
   ret <2 x i32> %d
 }
 
 define <4 x i32> @rotl_v4i32_c(<4 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v4i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.4s, v0.4s, #3
-; CHECK-SD-NEXT:    usra v1.4s, v0.4s, #29
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v4i32_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v1.4s, v0.4s, #3
-; CHECK-GI-NEXT:    ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v4i32_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.4s, v0.4s, #3
+; CHECK-NEXT:    usra v1.4s, v0.4s, #29
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <4 x i32> @llvm.fshl(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %d
 }
 
 define <4 x i32> @rotr_v4i32_c(<4 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v4i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v1.4s, v0.4s, #29
-; CHECK-SD-NEXT:    usra v1.4s, v0.4s, #3
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotr_v4i32_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushr v1.4s, v0.4s, #3
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #29
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotr_v4i32_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v1.4s, v0.4s, #29
+; CHECK-NEXT:    usra v1.4s, v0.4s, #3
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <4 x i32> @llvm.fshr(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %d
@@ -3558,34 +3454,33 @@ define <7 x i32> @rotl_v7i32_c(<7 x i32> %a) {
 ; CHECK-GI-LABEL: rotl_v7i32_c:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov s0, w0
-; CHECK-GI-NEXT:    fmov s1, w0
 ; CHECK-GI-NEXT:    mov w8, #29 // =0x1d
-; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    fmov s2, w0
+; CHECK-GI-NEXT:    fmov s1, w8
 ; CHECK-GI-NEXT:    mov w9, #3 // =0x3
 ; CHECK-GI-NEXT:    fmov s3, w4
 ; CHECK-GI-NEXT:    fmov s4, w4
 ; CHECK-GI-NEXT:    fmov s5, w9
-; CHECK-GI-NEXT:    mov v1.s[1], w1
 ; CHECK-GI-NEXT:    mov v0.s[1], w1
-; CHECK-GI-NEXT:    mov v2.s[1], w8
+; CHECK-GI-NEXT:    mov v2.s[1], w1
+; CHECK-GI-NEXT:    mov v1.s[1], w8
 ; CHECK-GI-NEXT:    mov v3.s[1], w5
 ; CHECK-GI-NEXT:    mov v4.s[1], w5
 ; CHECK-GI-NEXT:    mov v5.s[1], w9
-; CHECK-GI-NEXT:    mov v1.s[2], w2
 ; CHECK-GI-NEXT:    mov v0.s[2], w2
-; CHECK-GI-NEXT:    mov v2.s[2], w8
+; CHECK-GI-NEXT:    mov v2.s[2], w2
+; CHECK-GI-NEXT:    mov v1.s[2], w8
 ; CHECK-GI-NEXT:    mov v3.s[2], w6
 ; CHECK-GI-NEXT:    mov v4.s[2], w6
 ; CHECK-GI-NEXT:    mov v5.s[2], w9
-; CHECK-GI-NEXT:    mov v1.s[3], w3
 ; CHECK-GI-NEXT:    mov v0.s[3], w3
-; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    mov v2.s[3], w3
+; CHECK-GI-NEXT:    neg v1.4s, v1.4s
 ; CHECK-GI-NEXT:    ushl v4.4s, v4.4s, v5.4s
-; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #3
-; CHECK-GI-NEXT:    ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT:    ushl v2.4s, v3.4s, v2.4s
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    orr v1.16b, v4.16b, v2.16b
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #3
+; CHECK-GI-NEXT:    ushl v1.4s, v3.4s, v1.4s
+; CHECK-GI-NEXT:    usra v0.4s, v2.4s, #29
+; CHECK-GI-NEXT:    orr v1.16b, v4.16b, v1.16b
 ; CHECK-GI-NEXT:    mov s2, v0.s[1]
 ; CHECK-GI-NEXT:    mov s3, v0.s[2]
 ; CHECK-GI-NEXT:    mov s4, v0.s[3]
@@ -3630,34 +3525,33 @@ define <7 x i32> @rotr_v7i32_c(<7 x i32> %a) {
 ; CHECK-GI-LABEL: rotr_v7i32_c:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov s0, w0
-; CHECK-GI-NEXT:    fmov s1, w0
 ; CHECK-GI-NEXT:    mov w8, #3 // =0x3
-; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    fmov s2, w0
+; CHECK-GI-NEXT:    fmov s1, w8
 ; CHECK-GI-NEXT:    mov w9, #29 // =0x1d
 ; CHECK-GI-NEXT:    fmov s3, w4
 ; CHECK-GI-NEXT:    fmov s4, w4
 ; CHECK-GI-NEXT:    fmov s5, w9
-; CHECK-GI-NEXT:    mov v1.s[1], w1
 ; CHECK-GI-NEXT:    mov v0.s[1], w1
-; CHECK-GI-NEXT:    mov v2.s[1], w8
+; CHECK-GI-NEXT:    mov v2.s[1], w1
+; CHECK-GI-NEXT:    mov v1.s[1], w8
 ; CHECK-GI-NEXT:    mov v3.s[1], w5
 ; CHECK-GI-NEXT:    mov v4.s[1], w5
 ; CHECK-GI-NEXT:    mov v5.s[1], w9
-; CHECK-GI-NEXT:    mov v1.s[2], w2
 ; CHECK-GI-NEXT:    mov v0.s[2], w2
-; CHECK-GI-NEXT:    mov v2.s[2], w8
+; CHECK-GI-NEXT:    mov v2.s[2], w2
+; CHECK-GI-NEXT:    mov v1.s[2], w8
 ; CHECK-GI-NEXT:    mov v3.s[2], w6
 ; CHECK-GI-NEXT:    mov v4.s[2], w6
 ; CHECK-GI-NEXT:    mov v5.s[2], w9
-; CHECK-GI-NEXT:    mov v1.s[3], w3
 ; CHECK-GI-NEXT:    mov v0.s[3], w3
-; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    mov v2.s[3], w3
+; CHECK-GI-NEXT:    neg v1.4s, v1.4s
 ; CHECK-GI-NEXT:    ushl v3.4s, v3.4s, v5.4s
-; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #3
 ; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #29
-; CHECK-GI-NEXT:    ushl v2.4s, v4.4s, v2.4s
-; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    orr v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    ushl v1.4s, v4.4s, v1.4s
+; CHECK-GI-NEXT:    usra v0.4s, v2.4s, #3
+; CHECK-GI-NEXT:    orr v1.16b, v1.16b, v3.16b
 ; CHECK-GI-NEXT:    mov s2, v0.s[1]
 ; CHECK-GI-NEXT:    mov s3, v0.s[2]
 ; CHECK-GI-NEXT:    mov s4, v0.s[3]
@@ -3677,138 +3571,84 @@ entry:
 }
 
 define <8 x i32> @rotl_v8i32_c(<8 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v8i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v2.4s, v0.4s, #3
-; CHECK-SD-NEXT:    shl v3.4s, v1.4s, #3
-; CHECK-SD-NEXT:    usra v2.4s, v0.4s, #29
-; CHECK-SD-NEXT:    usra v3.4s, v1.4s, #29
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    mov v1.16b, v3.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: rotl_v8i32_c:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    shl v2.4s, v0.4s, #3
-; CHECK-GI-NEXT:    shl v3.4s, v1.4s, #3
-; CHECK-GI-NEXT:    ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #29
-; CHECK-GI-NEXT:    orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT:    orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: rotl_v8i32_c:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v2.4s, v0.4s, #3
+; CHECK-NEXT:    shl v3.4s, v1.4s, #3
+; CHECK-NEXT:    usra v2.4s, v0.4s, #29
+; CHECK-NEXT:    usra v3.4s, v1.4s, #29
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
 entry:
   %d = call <8 x i32> @llvm.fshl(<8 x i32> %a, <8 x i32> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>)
   ret <8 x i32> %d
 }
 
 define <8 x i32> @rotr_v8i32_c(<8 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v8i32_c:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v2.4s, v0.4s, #29
-; CHECK-SD-NEXT:    shl v3.4s, v1.4s, #29
-; CHECK-SD-NEXT:    usra v2.4s, v0.4s, #3
-; CHECK-SD-NEXT:    usra v3.4s, v1.4s, #3
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    mov v1.16b, v3.16b
-; CHECK-SD-N...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants