aarch64: Add costs for one element of a scatter store
Currently each element in a gather load is costed as a scalar_load and each element in a scatter store is costed as a scalar_store. The load side seems to work pretty well in practice, since many CPU-specific costs give loads quite a high cost relative to arithmetic operations. However, stores usually have a cost of just 1, which means that scatters tend to appear too cheap. This patch adds a separate cost for one element in a scatter store. Like with the previous patches, this one only becomes active if a CPU selects use_new_vector_costs. It should therefore have a very low impact on other CPUs. gcc/ * config/aarch64/aarch64-protos.h (sve_vec_cost::scatter_store_elt_cost): New member variable. * config/aarch64/aarch64.c (generic_sve_vector_cost): Update accordingly, taking the cost from the cost of a scalar_store. (a64fx_sve_vector_cost): Likewise. (aarch64_detect_vector_stmt_subtype): Detect scatter stores.
This commit is contained in:
parent
d1ff0847b2
commit
7c679969ba
@ -256,12 +256,14 @@ struct sve_vec_cost : simd_vec_cost
|
||||
unsigned int clast_cost,
|
||||
unsigned int fadda_f16_cost,
|
||||
unsigned int fadda_f32_cost,
|
||||
unsigned int fadda_f64_cost)
|
||||
unsigned int fadda_f64_cost,
|
||||
unsigned int scatter_store_elt_cost)
|
||||
: simd_vec_cost (base),
|
||||
clast_cost (clast_cost),
|
||||
fadda_f16_cost (fadda_f16_cost),
|
||||
fadda_f32_cost (fadda_f32_cost),
|
||||
fadda_f64_cost (fadda_f64_cost)
|
||||
fadda_f64_cost (fadda_f64_cost),
|
||||
scatter_store_elt_cost (scatter_store_elt_cost)
|
||||
{}
|
||||
|
||||
/* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
|
||||
@ -274,6 +276,9 @@ struct sve_vec_cost : simd_vec_cost
|
||||
const int fadda_f16_cost;
|
||||
const int fadda_f32_cost;
|
||||
const int fadda_f64_cost;
|
||||
|
||||
/* The per-element cost of a scatter store. */
|
||||
const int scatter_store_elt_cost;
|
||||
};
|
||||
|
||||
/* Cost for vector insn classes. */
|
||||
|
||||
@ -638,7 +638,8 @@ static const sve_vec_cost generic_sve_vector_cost =
|
||||
2, /* clast_cost */
|
||||
2, /* fadda_f16_cost */
|
||||
2, /* fadda_f32_cost */
|
||||
2 /* fadda_f64_cost */
|
||||
2, /* fadda_f64_cost */
|
||||
1 /* scatter_store_elt_cost */
|
||||
};
|
||||
|
||||
/* Generic costs for vector insn classes. */
|
||||
@ -705,7 +706,8 @@ static const sve_vec_cost a64fx_sve_vector_cost =
|
||||
13, /* clast_cost */
|
||||
13, /* fadda_f16_cost */
|
||||
13, /* fadda_f32_cost */
|
||||
13 /* fadda_f64_cost */
|
||||
13, /* fadda_f64_cost */
|
||||
1 /* scatter_store_elt_cost */
|
||||
};
|
||||
|
||||
static const struct cpu_vector_cost a64fx_vector_cost =
|
||||
@ -14279,6 +14281,13 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
|
||||
&& DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
|
||||
return simd_costs->store_elt_extra_cost;
|
||||
|
||||
/* Detect cases in which a scalar_store is really storing one element
|
||||
in a scatter operation. */
|
||||
if (kind == scalar_store
|
||||
&& sve_costs
|
||||
&& STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
|
||||
return sve_costs->scatter_store_elt_cost;
|
||||
|
||||
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
|
||||
if (kind == vec_to_scalar
|
||||
&& where == vect_body
|
||||
|
||||
Loading…
Reference in New Issue
Block a user