bswap: Handle vector CONSTRUCTORs [PR96239]
The following patch teaches the bswap pass to handle for small (2/4/8 byte long) vectors a CONSTRUCTOR by determining if the bytes of the constructor come from non-vector sources and are either nop or bswap and changing the CONSTRUCTOR in that case to VIEW_CONVERT_EXPR from scalar integer to the vector type. Unfortunately, as I found after the patch was written, due to pass ordering this doesn't really fix the original testcase, just the one I wrote, because both loop and slp vectorization is done only after the bswap pass. A possible way out of that would be to perform just this particular bswap optimization (i.e. for CONSTRUCTOR assignments with integral vector types call find_bswap_or_nop and bswap_replace if successful) also during the store merging pass, it isn't really a store, but the store merging pass already performs bswapping when handling store, so it wouldn't be that big hack. What do you think? 2020-12-16 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/96239 * gimple-ssa-store-merging.c (find_bswap_or_nop): Handle a vector CONSTRUCTOR. (bswap_replace): Likewise. * gcc.dg/pr96239.c: New test.
This commit is contained in:
parent
eece52b53b
commit
cd676dfa57
@ -865,7 +865,66 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
|
||||
gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
|
||||
|
||||
if (!ins_stmt)
|
||||
return NULL;
|
||||
{
|
||||
if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
|
||||
|| BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
|
||||
return NULL;
|
||||
unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
|
||||
if (sz != 16 && sz != 32 && sz != 64)
|
||||
return NULL;
|
||||
tree rhs = gimple_assign_rhs1 (stmt);
|
||||
tree eltype = TREE_TYPE (TREE_TYPE (rhs));
|
||||
unsigned HOST_WIDE_INT eltsz
|
||||
= int_size_in_bytes (eltype) * BITS_PER_UNIT;
|
||||
if (TYPE_PRECISION (eltype) != eltsz)
|
||||
return NULL;
|
||||
constructor_elt *elt;
|
||||
unsigned int i;
|
||||
tree type = build_nonstandard_integer_type (sz, 1);
|
||||
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
|
||||
{
|
||||
if (TREE_CODE (elt->value) != SSA_NAME
|
||||
|| !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
|
||||
return NULL;
|
||||
struct symbolic_number n1;
|
||||
gimple *source_stmt
|
||||
= find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
|
||||
limit - 1);
|
||||
|
||||
if (!source_stmt)
|
||||
return NULL;
|
||||
|
||||
n1.type = type;
|
||||
if (!n1.base_addr)
|
||||
n1.range = sz / BITS_PER_UNIT;
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
ins_stmt = source_stmt;
|
||||
*n = n1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (n->vuse != n1.vuse)
|
||||
return NULL;
|
||||
|
||||
struct symbolic_number n0 = *n;
|
||||
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
{
|
||||
if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
|
||||
return NULL;
|
||||
}
|
||||
else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
|
||||
return NULL;
|
||||
ins_stmt
|
||||
= perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
|
||||
|
||||
if (!ins_stmt)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t cmpxchg, cmpnop;
|
||||
find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
|
||||
@ -939,11 +998,18 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
|
||||
{
|
||||
tree src, tmp, tgt = NULL_TREE;
|
||||
gimple *bswap_stmt;
|
||||
tree_code conv_code = NOP_EXPR;
|
||||
|
||||
gimple *cur_stmt = gsi_stmt (gsi);
|
||||
src = n->src;
|
||||
if (cur_stmt)
|
||||
tgt = gimple_assign_lhs (cur_stmt);
|
||||
{
|
||||
tgt = gimple_assign_lhs (cur_stmt);
|
||||
if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
|
||||
&& tgt
|
||||
&& VECTOR_TYPE_P (TREE_TYPE (tgt)))
|
||||
conv_code = VIEW_CONVERT_EXPR;
|
||||
}
|
||||
|
||||
/* Need to load the value from memory first. */
|
||||
if (n->base_addr)
|
||||
@ -1031,7 +1097,9 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
|
||||
load_stmt = gimple_build_assign (val_tmp, val_expr);
|
||||
gimple_set_vuse (load_stmt, n->vuse);
|
||||
gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
|
||||
gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
|
||||
if (conv_code == VIEW_CONVERT_EXPR)
|
||||
val_tmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), val_tmp);
|
||||
gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
|
||||
update_stmt (cur_stmt);
|
||||
}
|
||||
else if (cur_stmt)
|
||||
@ -1073,7 +1141,9 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
|
||||
{
|
||||
if (!is_gimple_val (src))
|
||||
return NULL_TREE;
|
||||
g = gimple_build_assign (tgt, NOP_EXPR, src);
|
||||
if (conv_code == VIEW_CONVERT_EXPR)
|
||||
src = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), src);
|
||||
g = gimple_build_assign (tgt, conv_code, src);
|
||||
}
|
||||
else if (cur_stmt)
|
||||
g = gimple_build_assign (tgt, src);
|
||||
@ -1153,7 +1223,10 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
|
||||
gimple *convert_stmt;
|
||||
|
||||
tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
|
||||
convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
|
||||
tree atmp = tmp;
|
||||
if (conv_code == VIEW_CONVERT_EXPR)
|
||||
atmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), tmp);
|
||||
convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
|
||||
gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
@ -1258,6 +1331,14 @@ pass_optimize_bswap::execute (function *fun)
|
||||
/* Fall through. */
|
||||
case BIT_IOR_EXPR:
|
||||
break;
|
||||
case CONSTRUCTOR:
|
||||
{
|
||||
tree rhs = gimple_assign_rhs1 (cur_stmt);
|
||||
if (VECTOR_TYPE_P (TREE_TYPE (rhs))
|
||||
&& INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
54
gcc/testsuite/gcc.dg/pr96239.c
Normal file
54
gcc/testsuite/gcc.dg/pr96239.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* PR tree-optimization/96239 */
|
||||
/* { dg-do run { target { ilp32 || lp64 } } } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
/* { dg-final { scan-tree-dump-times " r>> 8;" 1 "optimized" { target bswap } } } */
|
||||
/* { dg-final { scan-tree-dump-times " = __builtin_bswap64 " 1 "optimized" { target bswap } } } */
|
||||
/* { dg-final { scan-tree-dump-not " >> \(8\|16\|24\|32\|40\|48\|56\);" "optimized" { target bswap } } } */
|
||||
|
||||
typedef unsigned char V __attribute__((vector_size (2)));
|
||||
typedef unsigned char W __attribute__((vector_size (8)));
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (unsigned short x, V *p)
|
||||
{
|
||||
*p = (V) { x >> 8, x };
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
bar (unsigned long long x, W *p)
|
||||
{
|
||||
*p = (W) { x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x };
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (unsigned short x, V *p)
|
||||
{
|
||||
*p = (V) { x, x >> 8 };
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
qux (unsigned long long x, W *p)
|
||||
{
|
||||
*p = (W) { x, x >> 8, x >> 16, x >> 24, x >> 32, x >> 40, x >> 48, x >> 56 };
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
V a, c, e, g;
|
||||
W b, d, f, h;
|
||||
foo (0xcafe, &a);
|
||||
bar (0xdeadbeefcafebabeULL, &b);
|
||||
baz (0xdead, &c);
|
||||
qux (0xfeedbac1beefdeadULL, &d);
|
||||
e = (V) { 0xca, 0xfe };
|
||||
f = (W) { 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0xba, 0xbe };
|
||||
g = (V) { 0xad, 0xde };
|
||||
h = (W) { 0xad, 0xde, 0xef, 0xbe, 0xc1, 0xba, 0xed, 0xfe };
|
||||
if (__builtin_memcmp (&a, &e, sizeof (V))
|
||||
|| __builtin_memcmp (&b, &f, sizeof (W))
|
||||
|| __builtin_memcmp (&c, &g, sizeof (V))
|
||||
|| __builtin_memcmp (&d, &h, sizeof (W)))
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user