-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64][GlobalISel] Add disjoint to the G_OR when lowering G_ROTR/L #172317
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
It looks like this is already handled for funnel shifts, we can do the same for the or created when lowering G_ROTR and G_ROTL. This allows some more add-like-ors to match.
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: David Green (davemgreen) ChangesIt looks like this is already handled for funnel shifts, we can do the same for the or created when lowering G_ROTR and G_ROTL. This allows some more add-like-ors to match. Patch is 39.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172317.diff 7 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 251ea4b1e019a..433107ff32753 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8078,7 +8078,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
RevShiftVal =
MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
}
- MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
+ MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
index d2e8f15fced8e..2f0ac5ea76f2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rotr-rotl.mir
@@ -116,7 +116,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[AND]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SUB]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[AND1]](<4 x s32>)
- ; CHECK-NEXT: %rot:_(<4 x s32>) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: %rot:_(<4 x s32>) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $q0 = COPY %rot(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
@@ -148,7 +148,7 @@ body: |
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[AND]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SUB]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[AND1]](<4 x s32>)
- ; CHECK-NEXT: %rot:_(<4 x s32>) = G_OR [[LSHR]], [[SHL]]
+ ; CHECK-NEXT: %rot:_(<4 x s32>) = disjoint G_OR [[LSHR]], [[SHL]]
; CHECK-NEXT: $q0 = COPY %rot(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 1db776ea6f616..eb2ca906a9cf2 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -3175,114 +3175,72 @@ entry:
}
define <8 x i8> @rotl_v8i8_c(<8 x i8> %a) {
-; CHECK-SD-LABEL: rotl_v8i8_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.8b, v0.8b, #3
-; CHECK-SD-NEXT: usra v1.8b, v0.8b, #5
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v8i8_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.8b, v0.8b, #3
-; CHECK-GI-NEXT: ushr v0.8b, v0.8b, #5
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v8i8_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.8b, v0.8b, #3
+; CHECK-NEXT: usra v1.8b, v0.8b, #5
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <8 x i8> @llvm.fshl(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
ret <8 x i8> %d
}
define <8 x i8> @rotr_v8i8_c(<8 x i8> %a) {
-; CHECK-SD-LABEL: rotr_v8i8_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.8b, v0.8b, #5
-; CHECK-SD-NEXT: usra v1.8b, v0.8b, #3
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v8i8_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.8b, v0.8b, #3
-; CHECK-GI-NEXT: shl v0.8b, v0.8b, #5
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v8i8_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.8b, v0.8b, #5
+; CHECK-NEXT: usra v1.8b, v0.8b, #3
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <8 x i8> @llvm.fshr(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
ret <8 x i8> %d
}
define <16 x i8> @rotl_v16i8_c(<16 x i8> %a) {
-; CHECK-SD-LABEL: rotl_v16i8_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.16b, v0.16b, #3
-; CHECK-SD-NEXT: usra v1.16b, v0.16b, #5
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v16i8_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.16b, v0.16b, #3
-; CHECK-GI-NEXT: ushr v0.16b, v0.16b, #5
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v16i8_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.16b, v0.16b, #3
+; CHECK-NEXT: usra v1.16b, v0.16b, #5
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <16 x i8> @llvm.fshl(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
ret <16 x i8> %d
}
define <16 x i8> @rotr_v16i8_c(<16 x i8> %a) {
-; CHECK-SD-LABEL: rotr_v16i8_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.16b, v0.16b, #5
-; CHECK-SD-NEXT: usra v1.16b, v0.16b, #3
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v16i8_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.16b, v0.16b, #3
-; CHECK-GI-NEXT: shl v0.16b, v0.16b, #5
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v16i8_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.16b, v0.16b, #5
+; CHECK-NEXT: usra v1.16b, v0.16b, #3
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <16 x i8> @llvm.fshr(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
ret <16 x i8> %d
}
define <4 x i16> @rotl_v4i16_c(<4 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v4i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.4h, v0.4h, #3
-; CHECK-SD-NEXT: usra v1.4h, v0.4h, #13
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v4i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.4h, v0.4h, #3
-; CHECK-GI-NEXT: ushr v0.4h, v0.4h, #13
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v4i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.4h, v0.4h, #3
+; CHECK-NEXT: usra v1.4h, v0.4h, #13
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <4 x i16> @llvm.fshl(<4 x i16> %a, <4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
ret <4 x i16> %d
}
define <4 x i16> @rotr_v4i16_c(<4 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v4i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.4h, v0.4h, #13
-; CHECK-SD-NEXT: usra v1.4h, v0.4h, #3
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v4i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.4h, v0.4h, #3
-; CHECK-GI-NEXT: shl v0.4h, v0.4h, #13
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v4i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.4h, v0.4h, #13
+; CHECK-NEXT: usra v1.4h, v0.4h, #3
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <4 x i16> @llvm.fshr(<4 x i16> %a, <4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
ret <4 x i16> %d
@@ -3369,164 +3327,102 @@ entry:
}
define <8 x i16> @rotl_v8i16_c(<8 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v8i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.8h, v0.8h, #3
-; CHECK-SD-NEXT: usra v1.8h, v0.8h, #13
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v8i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.8h, v0.8h, #3
-; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v8i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.8h, v0.8h, #3
+; CHECK-NEXT: usra v1.8h, v0.8h, #13
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <8 x i16> @llvm.fshl(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
ret <8 x i16> %d
}
define <8 x i16> @rotr_v8i16_c(<8 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v8i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.8h, v0.8h, #13
-; CHECK-SD-NEXT: usra v1.8h, v0.8h, #3
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v8i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #3
-; CHECK-GI-NEXT: shl v0.8h, v0.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v8i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.8h, v0.8h, #13
+; CHECK-NEXT: usra v1.8h, v0.8h, #3
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <8 x i16> @llvm.fshr(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
ret <8 x i16> %d
}
define <16 x i16> @rotl_v16i16_c(<16 x i16> %a) {
-; CHECK-SD-LABEL: rotl_v16i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v2.8h, v0.8h, #3
-; CHECK-SD-NEXT: shl v3.8h, v1.8h, #3
-; CHECK-SD-NEXT: usra v2.8h, v0.8h, #13
-; CHECK-SD-NEXT: usra v3.8h, v1.8h, #13
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: mov v1.16b, v3.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v16i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v2.8h, v0.8h, #3
-; CHECK-GI-NEXT: shl v3.8h, v1.8h, #3
-; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #13
-; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT: orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v16i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v2.8h, v0.8h, #3
+; CHECK-NEXT: shl v3.8h, v1.8h, #3
+; CHECK-NEXT: usra v2.8h, v0.8h, #13
+; CHECK-NEXT: usra v3.8h, v1.8h, #13
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ret
entry:
%d = call <16 x i16> @llvm.fshl(<16 x i16> %a, <16 x i16> %a, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
ret <16 x i16> %d
}
define <16 x i16> @rotr_v16i16_c(<16 x i16> %a) {
-; CHECK-SD-LABEL: rotr_v16i16_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v2.8h, v0.8h, #13
-; CHECK-SD-NEXT: shl v3.8h, v1.8h, #13
-; CHECK-SD-NEXT: usra v2.8h, v0.8h, #3
-; CHECK-SD-NEXT: usra v3.8h, v1.8h, #3
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: mov v1.16b, v3.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v16i16_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v2.8h, v0.8h, #3
-; CHECK-GI-NEXT: ushr v3.8h, v1.8h, #3
-; CHECK-GI-NEXT: shl v0.8h, v0.8h, #13
-; CHECK-GI-NEXT: shl v1.8h, v1.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT: orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v16i16_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v2.8h, v0.8h, #13
+; CHECK-NEXT: shl v3.8h, v1.8h, #13
+; CHECK-NEXT: usra v2.8h, v0.8h, #3
+; CHECK-NEXT: usra v3.8h, v1.8h, #3
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ret
entry:
%d = call <16 x i16> @llvm.fshr(<16 x i16> %a, <16 x i16> %a, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
ret <16 x i16> %d
}
define <2 x i32> @rotl_v2i32_c(<2 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v2i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.2s, v0.2s, #3
-; CHECK-SD-NEXT: usra v1.2s, v0.2s, #29
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v2i32_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.2s, v0.2s, #3
-; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #29
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v2i32_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.2s, v0.2s, #3
+; CHECK-NEXT: usra v1.2s, v0.2s, #29
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <2 x i32> @llvm.fshl(<2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 3, i32 3>)
ret <2 x i32> %d
}
define <2 x i32> @rotr_v2i32_c(<2 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v2i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.2s, v0.2s, #29
-; CHECK-SD-NEXT: usra v1.2s, v0.2s, #3
-; CHECK-SD-NEXT: fmov d0, d1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v2i32_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.2s, v0.2s, #3
-; CHECK-GI-NEXT: shl v0.2s, v0.2s, #29
-; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v2i32_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.2s, v0.2s, #29
+; CHECK-NEXT: usra v1.2s, v0.2s, #3
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
entry:
%d = call <2 x i32> @llvm.fshr(<2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 3, i32 3>)
ret <2 x i32> %d
}
define <4 x i32> @rotl_v4i32_c(<4 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v4i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.4s, v0.4s, #3
-; CHECK-SD-NEXT: usra v1.4s, v0.4s, #29
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v4i32_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v1.4s, v0.4s, #3
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v4i32_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.4s, v0.4s, #3
+; CHECK-NEXT: usra v1.4s, v0.4s, #29
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <4 x i32> @llvm.fshl(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %d
}
define <4 x i32> @rotr_v4i32_c(<4 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v4i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v1.4s, v0.4s, #29
-; CHECK-SD-NEXT: usra v1.4s, v0.4s, #3
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_v4i32_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushr v1.4s, v0.4s, #3
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #29
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_v4i32_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v1.4s, v0.4s, #29
+; CHECK-NEXT: usra v1.4s, v0.4s, #3
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%d = call <4 x i32> @llvm.fshr(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %d
@@ -3558,34 +3454,33 @@ define <7 x i32> @rotl_v7i32_c(<7 x i32> %a) {
; CHECK-GI-LABEL: rotl_v7i32_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: mov w8, #29 // =0x1d
-; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: fmov s2, w0
+; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov w9, #3 // =0x3
; CHECK-GI-NEXT: fmov s3, w4
; CHECK-GI-NEXT: fmov s4, w4
; CHECK-GI-NEXT: fmov s5, w9
-; CHECK-GI-NEXT: mov v1.s[1], w1
; CHECK-GI-NEXT: mov v0.s[1], w1
-; CHECK-GI-NEXT: mov v2.s[1], w8
+; CHECK-GI-NEXT: mov v2.s[1], w1
+; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: mov v3.s[1], w5
; CHECK-GI-NEXT: mov v4.s[1], w5
; CHECK-GI-NEXT: mov v5.s[1], w9
-; CHECK-GI-NEXT: mov v1.s[2], w2
; CHECK-GI-NEXT: mov v0.s[2], w2
-; CHECK-GI-NEXT: mov v2.s[2], w8
+; CHECK-GI-NEXT: mov v2.s[2], w2
+; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: mov v3.s[2], w6
; CHECK-GI-NEXT: mov v4.s[2], w6
; CHECK-GI-NEXT: mov v5.s[2], w9
-; CHECK-GI-NEXT: mov v1.s[3], w3
; CHECK-GI-NEXT: mov v0.s[3], w3
-; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: mov v2.s[3], w3
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
; CHECK-GI-NEXT: ushl v4.4s, v4.4s, v5.4s
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #3
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT: ushl v2.4s, v3.4s, v2.4s
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: orr v1.16b, v4.16b, v2.16b
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #3
+; CHECK-GI-NEXT: ushl v1.4s, v3.4s, v1.4s
+; CHECK-GI-NEXT: usra v0.4s, v2.4s, #29
+; CHECK-GI-NEXT: orr v1.16b, v4.16b, v1.16b
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov s3, v0.s[2]
; CHECK-GI-NEXT: mov s4, v0.s[3]
@@ -3630,34 +3525,33 @@ define <7 x i32> @rotr_v7i32_c(<7 x i32> %a) {
; CHECK-GI-LABEL: rotr_v7i32_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: mov w8, #3 // =0x3
-; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: fmov s2, w0
+; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov w9, #29 // =0x1d
; CHECK-GI-NEXT: fmov s3, w4
; CHECK-GI-NEXT: fmov s4, w4
; CHECK-GI-NEXT: fmov s5, w9
-; CHECK-GI-NEXT: mov v1.s[1], w1
; CHECK-GI-NEXT: mov v0.s[1], w1
-; CHECK-GI-NEXT: mov v2.s[1], w8
+; CHECK-GI-NEXT: mov v2.s[1], w1
+; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: mov v3.s[1], w5
; CHECK-GI-NEXT: mov v4.s[1], w5
; CHECK-GI-NEXT: mov v5.s[1], w9
-; CHECK-GI-NEXT: mov v1.s[2], w2
; CHECK-GI-NEXT: mov v0.s[2], w2
-; CHECK-GI-NEXT: mov v2.s[2], w8
+; CHECK-GI-NEXT: mov v2.s[2], w2
+; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: mov v3.s[2], w6
; CHECK-GI-NEXT: mov v4.s[2], w6
; CHECK-GI-NEXT: mov v5.s[2], w9
-; CHECK-GI-NEXT: mov v1.s[3], w3
; CHECK-GI-NEXT: mov v0.s[3], w3
-; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: mov v2.s[3], w3
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
; CHECK-GI-NEXT: ushl v3.4s, v3.4s, v5.4s
-; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #3
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #29
-; CHECK-GI-NEXT: ushl v2.4s, v4.4s, v2.4s
-; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: orr v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT: ushl v1.4s, v4.4s, v1.4s
+; CHECK-GI-NEXT: usra v0.4s, v2.4s, #3
+; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov s3, v0.s[2]
; CHECK-GI-NEXT: mov s4, v0.s[3]
@@ -3677,138 +3571,84 @@ entry:
}
define <8 x i32> @rotl_v8i32_c(<8 x i32> %a) {
-; CHECK-SD-LABEL: rotl_v8i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v2.4s, v0.4s, #3
-; CHECK-SD-NEXT: shl v3.4s, v1.4s, #3
-; CHECK-SD-NEXT: usra v2.4s, v0.4s, #29
-; CHECK-SD-NEXT: usra v3.4s, v1.4s, #29
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: mov v1.16b, v3.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotl_v8i32_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: shl v2.4s, v0.4s, #3
-; CHECK-GI-NEXT: shl v3.4s, v1.4s, #3
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #29
-; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #29
-; CHECK-GI-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-GI-NEXT: orr v1.16b, v3.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotl_v8i32_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v2.4s, v0.4s, #3
+; CHECK-NEXT: shl v3.4s, v1.4s, #3
+; CHECK-NEXT: usra v2.4s, v0.4s, #29
+; CHECK-NEXT: usra v3.4s, v1.4s, #29
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ret
entry:
%d = call <8 x i32> @llvm.fshl(<8 x i32> %a, <8 x i32> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>)
ret <8 x i32> %d
}
define <8 x i32> @rotr_v8i32_c(<8 x i32> %a) {
-; CHECK-SD-LABEL: rotr_v8i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v2.4s, v0.4s, #29
-; CHECK-SD-NEXT: shl v3.4s, v1.4s, #29
-; CHECK-SD-NEXT: usra v2.4s, v0.4s, #3
-; CHECK-SD-NEXT: usra v3.4s, v1.4s, #3
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: mov v1.16b, v3.16b
-; CHECK-SD-N...
[truncated]
|
It looks like this is already handled for funnel shifts, we can do the same for the or created when lowering G_ROTR and G_ROTL. This allows some more add-like-ors to match.