arm: Implement vceqq_p64, vceqz_p64 and vceqzq_p64 intrinsics
This patch adds implementations for vceqq_p64, vceqz_p64 and vceqzq_p64 intrinsics. vceqq_p64 uses the existing vceq_p64 after splitting the input vectors into their high and low halves. vceqz[q] simply call the vceq and vceqq with a second argument equal to zero. The added (executable) testcases make sure that the poly64x2_t variants have results with one element of all zeroes (false) and the other element with all bits set to one (true). 2021-01-15 Christophe Lyon <christophe.lyon@linaro.org> gcc/ PR target/71233 * config/arm/arm_neon.h (vceqz_p64, vceqq_p64, vceqzq_p64): New. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c: Add tests for vceqz_p64, vceqq_p64 and vceqzq_p64. * gcc.target/arm/simd/vceqz_p64.c: New test. * gcc.target/arm/simd/vceqzq_p64.c: New test.
This commit is contained in:
parent
f1d054017e
commit
63999d751d
@ -16912,6 +16912,37 @@ vceq_p64 (poly64x1_t __a, poly64x1_t __b)
|
||||
return vreinterpret_u64_u32 (__m);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vceqz_p64 (poly64x1_t __a)
|
||||
{
|
||||
poly64x1_t __b = vreinterpret_p64_u32 (vdup_n_u32 (0));
|
||||
return vceq_p64 (__a, __b);
|
||||
}
|
||||
|
||||
/* For vceqq_p64, we rely on vceq_p64 for each of the two elements. */
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vceqq_p64 (poly64x2_t __a, poly64x2_t __b)
|
||||
{
|
||||
poly64_t __high_a = vget_high_p64 (__a);
|
||||
poly64_t __high_b = vget_high_p64 (__b);
|
||||
uint64x1_t __high = vceq_p64 (__high_a, __high_b);
|
||||
|
||||
poly64_t __low_a = vget_low_p64 (__a);
|
||||
poly64_t __low_b = vget_low_p64 (__b);
|
||||
uint64x1_t __low = vceq_p64 (__low_a, __low_b);
|
||||
return vcombine_u64 (__low, __high);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vceqzq_p64 (poly64x2_t __a)
|
||||
{
|
||||
poly64x2_t __b = vreinterpretq_p64_u32 (vdupq_n_u32 (0));
|
||||
return vceqq_p64 (__a, __b);
|
||||
}
|
||||
|
||||
/* The vtst_p64 intrinsic does not map to a single instruction.
|
||||
We emulate it in way similar to vceq_p64 above but here we do
|
||||
a reduction with max since if any two corresponding bits
|
||||
|
@ -16,6 +16,11 @@ VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
|
||||
|
||||
/* Expected results: vceq. */
|
||||
VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
|
||||
VECT_VAR_DECL(vceq_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
|
||||
|
||||
/* Expected results: vceqz. */
|
||||
VECT_VAR_DECL(vceqz_expected,uint,64,1) [] = { 0x0 };
|
||||
VECT_VAR_DECL(vceqz_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
|
||||
|
||||
/* Expected results: vcombine. */
|
||||
VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
|
||||
@ -213,7 +218,7 @@ int main (void)
|
||||
|
||||
/* vceq_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VCEQ"
|
||||
#define TEST_MSG "VCEQ/VCEQQ"
|
||||
|
||||
#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \
|
||||
VECT_VAR(vceq_vector_res, T3, W, N) = \
|
||||
@ -227,16 +232,55 @@ int main (void)
|
||||
DECL_VARIABLE(vceq_vector, poly, 64, 1);
|
||||
DECL_VARIABLE(vceq_vector2, poly, 64, 1);
|
||||
DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
|
||||
DECL_VARIABLE(vceq_vector, poly, 64, 2);
|
||||
DECL_VARIABLE(vceq_vector2, poly, 64, 2);
|
||||
DECL_VARIABLE(vceq_vector_res, uint, 64, 2);
|
||||
|
||||
CLEAN(result, uint, 64, 1);
|
||||
CLEAN(result, uint, 64, 2);
|
||||
|
||||
VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
|
||||
VLOAD(vceq_vector, buffer, q, poly, p, 64, 2);
|
||||
|
||||
VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
|
||||
VSET_LANE(vceq_vector2, q, poly, p, 64, 2, 0, 0x88);
|
||||
VSET_LANE(vceq_vector2, q, poly, p, 64, 2, 1, 0xFFFFFFFFFFFFFFF1);
|
||||
|
||||
TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
|
||||
TEST_VCOMP(vceq, q, poly, p, uint, 64, 2);
|
||||
|
||||
CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
|
||||
CHECK(TEST_MSG, uint, 64, 2, PRIx64, vceq_expected, "");
|
||||
|
||||
/* vceqz_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
#define TEST_MSG "VCEQZ/VCEQZQ"
|
||||
|
||||
#define TEST_VCOMPZ1(INSN, Q, T1, T2, T3, W, N) \
|
||||
VECT_VAR(vceqz_vector_res, T3, W, N) = \
|
||||
INSN##Q##_##T2##W(VECT_VAR(vceqz_vector, T1, W, N)); \
|
||||
vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceqz_vector_res, T3, W, N))
|
||||
|
||||
#define TEST_VCOMPZ(INSN, Q, T1, T2, T3, W, N) \
|
||||
TEST_VCOMPZ1(INSN, Q, T1, T2, T3, W, N)
|
||||
|
||||
DECL_VARIABLE(vceqz_vector, poly, 64, 1);
|
||||
DECL_VARIABLE(vceqz_vector_res, uint, 64, 1);
|
||||
DECL_VARIABLE(vceqz_vector, poly, 64, 2);
|
||||
DECL_VARIABLE(vceqz_vector_res, uint, 64, 2);
|
||||
|
||||
CLEAN(result, uint, 64, 1);
|
||||
CLEAN(result, uint, 64, 2);
|
||||
|
||||
VLOAD(vceqz_vector, buffer, , poly, p, 64, 1);
|
||||
VLOAD(vceqz_vector, buffer, q, poly, p, 64, 2);
|
||||
VSET_LANE(vceqz_vector, q, poly, p, 64, 2, 1, 0);
|
||||
|
||||
TEST_VCOMPZ(vceqz, , poly, p, uint, 64, 1);
|
||||
TEST_VCOMPZ(vceqz, q, poly, p, uint, 64, 2);
|
||||
|
||||
CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceqz_expected, "");
|
||||
CHECK(TEST_MSG, uint, 64, 2, PRIx64, vceqz_expected, "");
|
||||
|
||||
/* vcombine_p64 tests. */
|
||||
#undef TEST_MSG
|
||||
|
17
gcc/testsuite/gcc.target/arm/simd/vceqz_p64.c
Normal file
17
gcc/testsuite/gcc.target/arm/simd/vceqz_p64.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Test the `vceqz_p64' ARM Neon intrinsic. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
poly64x1_t v1;
|
||||
uint64x1_t result1;
|
||||
|
||||
void func()
|
||||
{
|
||||
result1 = vceqz_p64 (v1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vceq\.i32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 1 } } */
|
17
gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c
Normal file
17
gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Test the `vceqzq_p64' ARM Neon intrinsic. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
poly64x2_t v2;
|
||||
uint64x2_t result2;
|
||||
|
||||
void func()
|
||||
{
|
||||
result2 = vceqzq_p64 (v2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vceq\.i32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */
|
Loading…
Reference in New Issue
Block a user