aarch64: Fix PR99437 - tighten shift predicates for narrowing shift patterns

In this bug combine forms the (R)SHRN(2) instructions with an invalid shift amount. The intrinsic expanders for these patterns validate the right shift amount but if the final patterns end up being matched by combine (or other RTL passes I suppose) they still let the wrong const_vector through. This patch tightens up the predicates for the instructions involved by using predicates for the right shift amount const_vectors. gcc/ChangeLog: PR target/99437 * config/aarch64/predicates.md (aarch64_simd_shift_imm_vec_qi): Define. (aarch64_simd_shift_imm_vec_hi): Likewise. (aarch64_simd_shift_imm_vec_si): Likewise. (aarch64_simd_shift_imm_vec_di): Likewise. * config/aarch64/aarch64-simd.md (aarch64_shrn<mode>_insn_le): Use predicate from above. (aarch64_shrn<mode>_insn_be): Likewise. (aarch64_rshrn<mode>_insn_le): Likewise. (aarch64_rshrn<mode>_insn_be): Likewise. (aarch64_shrn2<mode>_insn_le): Likewise. (aarch64_shrn2<mode>_insn_be): Likewise. (aarch64_rshrn2<mode>_insn_le): Likewise. (aarch64_rshrn2<mode>_insn_be): Likewise. gcc/testsuite/ChangeLog: PR target/99437 * gcc.target/aarch64/simd/pr99437.c: New test.
2021-03-08 15:05:21 +00:00 · 2021-03-08 15:05:21 +00:00 · 0d9a70ea38
commit 0d9a70ea38
parent 81fee43851
3 changed files with 46 additions and 10 deletions
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@ -1738,7 +1738,7 @@
 	(vec_concat:<VNARROWQ2>
 	  (truncate:<VNARROWQ>
 	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
-		  (match_operand:VQN 2 "aarch64_simd_rshift_imm")))
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
 	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
@ -1751,7 +1751,7 @@
 	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
 	  (truncate:<VNARROWQ>
 	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
-		  (match_operand:VQN 2 "aarch64_simd_rshift_imm")))))]
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
  "TARGET_SIMD && BYTES_BIG_ENDIAN"
  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
  [(set_attr "type" "neon_shift_imm_narrow_q")]
@ -1786,8 +1786,8 @@
  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-			       (match_operand:VQN 2 "aarch64_simd_rshift_imm")
-			      ] UNSPEC_RSHRN)
+		(match_operand:VQN 2
+		  "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
 	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
@ -1799,8 +1799,8 @@
 	(vec_concat:<VNARROWQ2>
 	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
 	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-			       (match_operand:VQN 2 "aarch64_simd_rshift_imm")
-			      ] UNSPEC_RSHRN)))]
+		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		  UNSPEC_RSHRN)))]
  "TARGET_SIMD && BYTES_BIG_ENDIAN"
  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
  [(set_attr "type" "neon_shift_imm_narrow_q")]
@ -1836,7 +1836,7 @@
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
 	  (truncate:<VNARROWQ>
 	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
-		(match_operand:VQN 3 "aarch64_simd_rshift_imm")))))]
+	      (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
  [(set_attr "type" "neon_shift_imm_narrow_q")]
@ -1847,7 +1847,8 @@
 	(vec_concat:<VNARROWQ2>
 	  (truncate:<VNARROWQ>
 	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
-		(match_operand:VQN 3 "aarch64_simd_rshift_imm")))
+	      (match_operand:VQN 3
+		"aarch64_simd_shift_imm_vec_<vn_mode>")))
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
  "TARGET_SIMD && BYTES_BIG_ENDIAN"
  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
@ -1878,7 +1879,8 @@
 	(vec_concat:<VNARROWQ2>
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
 	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
-		       (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)))]
+	    (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		UNSPEC_RSHRN)))]
  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
  [(set_attr "type" "neon_shift_imm_narrow_q")]
@ -1888,7 +1890,8 @@
  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
-		       (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)
+		(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		  UNSPEC_RSHRN)
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
  "TARGET_SIMD && BYTES_BIG_ENDIAN"
  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@ -545,6 +545,22 @@
  (and (match_code "const_int")
       (match_test "IN_RANGE (INTVAL (op), 1, 64)")))

+(define_predicate "aarch64_simd_shift_imm_vec_qi"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_hi"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 16)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_si"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 32)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_di"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
  (and (match_code "const_int")
       (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99437.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99437.c
@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+uint8x16_t
+foo (uint16x8_t a, uint8x8_t b)
+{
+  return vcombine_u8 (vmovn_u16 (vshrq_n_u16 (a, 9)), b);
+}
+
+uint8x16_t
+foo2 (uint16x8_t a, uint8x8_t b)
+{
+  return vcombine_u8 (b, vmovn_u16 (vshrq_n_u16 (a, 15)));
+}
+