gimple-isel: Fall back to using vcond_mask [PR98560]

PR98560 is about a case in which the vectoriser initially generates:

  mask_1 = a < 0;
  mask_2 = mask_1 & ...;
  res = VEC_COND_EXPR <mask_2, b, c>;

The vectoriser thus expects res to be calculated using vcond_mask.
However, we later manage to fold mask_2 to mask_1, leaving:

  mask_1 = a < 0;
  res = VEC_COND_EXPR <mask_1, b, c>;

gimple-isel then required a combined vcond to exist.

On most targets, it's not too onerous to provide all possible
(compare x select) combinations.  For each data mode, you just
need to provide unsigned comparisons, signed comparisons, and
floating-point comparisons, with the data mode and type of
comparison uniquely determining the mode of the compared values.
But for targets like SVE that support “unpacked” vectors,
it's not that simple: the level of unpacking adds another
degree of freedom.

Rather than insist that the combined versions exist, I think
we should be prepared to fall back to using separate comparisons
and vcond_masks.  I think that makes more sense on targets like
AArch64 and AArch32 in which compares and selects are fundementally
separate operations anyway.

gcc/
	PR tree-optimization/98560
	* gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use
	IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK.

gcc/testsuite/
	PR tree-optimization/98560
	* gcc.dg/vect/pr98560-1.c: New test.
This commit is contained in:
Richard Sandiford 2021-01-07 15:00:38 +00:00
parent d54be5ad21
commit 78595e918e
2 changed files with 36 additions and 7 deletions

View File

@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
return gimple_build_assign (lhs, tem3);
}
bool can_compute_op0 = true;
gcc_assert (!COMPARISON_CLASS_P (op0));
if (TREE_CODE (op0) == SSA_NAME)
{
@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
tree op0_type = TREE_TYPE (op0);
tree op0a_type = TREE_TYPE (op0a);
if (TREE_CODE_CLASS (tcode) == tcc_comparison)
can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
tcode);
/* Try to fold x CMP y ? -1 : 0 to x CMP y. */
if (integer_minus_onep (op1)
if (can_compute_op0
&& integer_minus_onep (op1)
&& integer_zerop (op2)
&& TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))
&& expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
&& TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
{
tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
gassign *new_stmt = gimple_build_assign (lhs, conv_op);
@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
return new_stmt;
}
if (used_vec_cond_exprs >= 2
if (can_compute_op0
&& used_vec_cond_exprs >= 2
&& (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
!= CODE_FOR_nothing)
&& expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
!= CODE_FOR_nothing))
{
/* Keep the SSA name and use vcond_mask. */
tcode = TREE_CODE (op0);
@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
}
}
gcc_assert (icode != CODE_FOR_nothing);
if (icode == CODE_FOR_nothing)
{
gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))
&& can_compute_op0
&& (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
!= CODE_FOR_nothing));
return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
}
tree tcode_tree = build_int_cst (integer_type_node, tcode);
return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
5, op0a, op0b, op1, op2, tcode_tree);

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-additional-options "-O3 -fno-tree-vrp -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fvect-cost-model=dynamic" } */
/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve } } */
#include <stdint.h>
void
f (uint16_t *restrict dst, uint32_t *restrict src1, float *restrict src2)
{
int i = 0;
for (int j = 0; j < 4; ++j)
{
uint16_t tmp = src1[i] >> 1;
dst[i] = (uint16_t) (src2[i] < 0 && i < 4 ? tmp : 1);
i += 1;
}
}