diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 63750e38862..e2d92f0c136 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14492,6 +14492,23 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, return 0; } +/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost + for STMT_INFO, which has cost kind KIND. If this is a scalar operation, + try to subdivide the target-independent categorization provided by KIND + to get a more accurate cost. */ +static unsigned int +aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, + stmt_vec_info stmt_info, + unsigned int stmt_cost) +{ + /* Detect an extension of a loaded value. In general, we'll be able to fuse + the extension with the load. */ + if (kind == scalar_stmt && aarch64_extending_load_p (vinfo, stmt_info)) + return 0; + + return stmt_cost; +} + /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost for the vectorized form of STMT_INFO, which has cost kind KIND and which when vectorized would operate on vector type VECTYPE. Try to subdivide @@ -14702,10 +14719,16 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count, /* Try to get a more accurate cost by looking at STMT_INFO instead of just looking at KIND. */ - if (stmt_info && vectype && aarch64_use_new_vector_costs_p ()) - stmt_cost = aarch64_detect_vector_stmt_subtype (vinfo, kind, - stmt_info, vectype, - where, stmt_cost); + if (stmt_info && aarch64_use_new_vector_costs_p ()) + { + stmt_cost = aarch64_detect_scalar_stmt_subtype + (vinfo, kind, stmt_info, stmt_cost); + + if (vectype && costs->vec_flags) + stmt_cost = aarch64_detect_vector_stmt_subtype (vinfo, kind, + stmt_info, vectype, + where, stmt_cost); + } /* Do any SVE-specific adjustments to the cost. */ if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))