This patch rewrites the old VEC macro-based interface into a new one based on the template class 'vec'. The user-visible changes are described in http://gcc.gnu.org/wiki/cxx-conversion/cxx-vec. I have tested the patch pretty extensively: - Regular bootstraps on x86_64, ppc, ia64, sparc and hppa. - Bootstraps with --enable-checking=release - Bootstraps with --enable-checking=gc,gcac - Basic builds on all targets (using contrib/config-list.mk). We no longer access the vectors via VEC_* macros. The pattern is "VEC_operation (T, A, V, args)" becomes "V.operation (args)". The only thing I could not do is create proper ctors and dtors for the vec class. Since these vectors are stored in unions, we have to keep them as PODs (C++03 does not allow non-PODs in unions). This means that creation and destruction must be explicit. There is a new method vec<type, allocation, layout>::create() and another vec<type, allocation, layout>::destroy() to allocate the internal vector. For vectors that must be pointers, there is a family of free functions that implement the operations that need to tolerate NULL vectors. These functions all start with the prefix 'vec_safe_'. See the wiki page for details. The gengtype change removes the special handling for VEC() that used to exist in gengtype. Additionally, it allows gengtype to recognize templates of more than one argument and introduces the concept of an undefined type (useful for template arguments that may or may not be types). When a TYPE_UNDEFINED is reached, gengtype will ignore it if it happens inside a type marked with GTY((user)). Otherwise, it will emit an error. Finally, gengtype rejects root types marked GTY((user)) that are not first class pointers. 2012-11-16 Diego Novillo <dnovillo@google.com> VEC API overhaul (http://gcc.gnu.org/wiki/cxx-conversion/cxx-vec) * vec.c (register_overhead): Convert it into member function of vec_prefix. (release_overhead): Likewise. (calculate_allocation): Likewise. (vec_heap_free): Remove. (vec_gc_o_reserve_1): Remove. (vec_heap_o_reserve_1): Remove. (vec_stack_o_reserve_1): Remove. (vec_stack_o_reserve_exact): Remove. (register_stack_vec): New. (stack_vec_register_index): New. (unregister_stack_vec): New. (vec_assert_fail): Remove. * vec.h: Conditionally include ggc.h. Document conditional hackery. Update top-level documentation. (ALONE_VEC_CHECK_INFO): Remove. (VEC_CHECK_INFO): Remove. (ALONE_VEC_CHECK_DECL): Remove. (VEC_CHECK_DECL): Remove. (ALONE_VEC_CHECK_PASS): Remove. (VEC_CHECK_PASS): Remove. (VEC_ASSERT): Remove. (vec_prefix): Add friends va_gc, va_gc_atomic, va_heap and va_stack. Mark fields alloc_ and num_ as protected. (struct vec_t): Remove. Remove all function members. (struct vl_embed): Declare. (struct vl_ptr): Declare. (free): Remove. (reserve_exact): Remove. (reserve): Remove. (safe_splice): Remove. (safe_push): Remove. (safe_grow): Remove. (safe_grow_cleared): Remove. (safe_insert): Remove. (DEF_VEC_I): Remove. (DEF_VEC_ALLOC_I): Remove. (DEF_VEC_P): Remove. (DEF_VEC_ALLOC_P): Remove. (DEF_VEC_O): Remove. (DEF_VEC_ALLOC_O): Remove. (DEF_VEC_ALLOC_P_STACK): Remove. (DEF_VEC_ALLOC_O_STACK): Remove. (DEF_VEC_ALLOC_I_STACK): Remove. (DEF_VEC_A): Remove. (DEF_VEC_ALLOC_A): Remove. (vec_stack_p_reserve_exact_1): Remove. (vec_stack_o_reserve): Remove. (vec_stack_o_reserve_exact): Remove. (VEC_length): Remove. (VEC_empty): Remove. (VEC_address): Remove. (vec_address): Remove. (VEC_last): Remove. (VEC_index): Remove. (VEC_iterate): Remove. (VEC_embedded_size): Remove. (VEC_embedded_init): Remove. (VEC_free): Remove. (VEC_copy): Remove. (VEC_space): Remove. (VEC_reserve): Remove. (VEC_reserve_exact): Remove. (VEC_splice): Remove. (VEC_safe_splice): Remove. (VEC_quick_push): Remove. (VEC_safe_push): Remove. (VEC_pop): Remove. (VEC_truncate): Remove. (VEC_safe_grow): Remove. (VEC_replace): Remove. (VEC_quick_insert): Remove. (VEC_safe_insert): Remove. (VEC_ordered_remove): Remove. (VEC_unordered_remove): Remove. (VEC_block_remove): Remove. (VEC_lower_bound): Remove. (VEC_alloc): Remove. (VEC_qsort): Remove. (va_heap): Declare. (va_heap::default_layout): New typedef to vl_ptr. (va_heap::reserve): New. (va_heap::release): New. (va_gc): Declare. (va_gc::default_layout): New typedef to vl_embed. (va_gc::reserve): New. (va_gc::release): New. (va_gc_atomic): Declare. Inherit from va_gc. (va_stack): Declare. (va_stack::default_layout): New typedef to vl_ptr. (va_stack::alloc): New. (va_stack::reserve): New. (va_stack::release): New. (register_stack_vec): Declare. (stack_vec_register_index): Declare. (unregister_stack_vec): Declare. (vec<T, A = va_heap, L = typename A::default_layout>): Declare empty vec template. (vec<T, A, vl_embed>): Partial specialization for embedded layout. (vec<T, A, vl_embed>::allocated): New. (vec<T, A, vl_embed>::length): New. (vec<T, A, vl_embed>::is_empty): New. (vec<T, A, vl_embed>::address): New. (vec<T, A, vl_embed>::operator[]): New. (vec<T, A, vl_embed>::last New. (vec<T, A, vl_embed>::space): New. (vec<T, A, vl_embed>::iterate): New. (vec<T, A, vl_embed>::iterate): New. (vec<T, A, vl_embed>::copy): New. (vec<T, A, vl_embed>::splice): New. (vec<T, A, vl_embed>::quick_push New. (vec<T, A, vl_embed>::pop New. (vec<T, A, vl_embed>::truncate): New. (vec<T, A, vl_embed>::quick_insert): New. (vec<T, A, vl_embed>::ordered_remove): New. (vec<T, A, vl_embed>::unordered_remove): New. (vec<T, A, vl_embed>::block_remove): New. (vec<T, A, vl_embed>::qsort): New. (vec<T, A, vl_embed>::lower_bound): New. (vec<T, A, vl_embed>::embedded_size): New. (vec<T, A, vl_embed>::embedded_init): New. (vec<T, A, vl_embed>::quick_grow): New. (vec<T, A, vl_embed>::quick_grow_cleared): New. (vec_safe_space): New. (vec_safe_length): New. (vec_safe_address): New. (vec_safe_is_empty): New. (vec_safe_reserve): New. (vec_safe_reserve_exact): New. (vec_alloc): New. (vec_free): New. (vec_safe_grow): New. (vec_safe_grow_cleared): New. (vec_safe_iterate): New. (vec_safe_push): New. (vec_safe_insert): New. (vec_safe_truncate): New. (vec_safe_copy): New. (vec_safe_splice): New. (vec<T, A, vl_ptr>): New partial specialization for the space efficient layout. (vec<T, A, vl_ptr>::exists): New. (vec<T, A, vl_ptr>::is_empty): New. (vec<T, A, vl_ptr>::length): New. (vec<T, A, vl_ptr>::address): New. (vec<T, A, vl_ptr>::operator[]): New. (vec<T, A, vl_ptr>::operator!=): New. (vec<T, A, vl_ptr>::operator==): New. (vec<T, A, vl_ptr>::last): New. (vec<T, A, vl_ptr>::space): New. (vec<T, A, vl_ptr>::iterate): New. (vec<T, A, vl_ptr>::copy): New. (vec<T, A, vl_ptr>::reserve): New. (vec<T, A, vl_ptr>::reserve_exact): New. (vec<T, A, vl_ptr>::splice): New. (vec<T, A, vl_ptr>::safe_splice): New. (vec<T, A, vl_ptr>::quick_push): New. (vec<T, A, vl_ptr>::safe_push): New. (vec<T, A, vl_ptr>::pop): New. (vec<T, A, vl_ptr>::truncate): New. (vec<T, A, vl_ptr>::safe_grow): New. (vec<T, A, vl_ptr>::safe_grow_cleared): New. (vec<T, A, vl_ptr>::quick_grow): New. (vec<T, A, vl_ptr>::quick_grow_cleared): New. (vec<T, A, vl_ptr>::quick_insert): New. (vec<T, A, vl_ptr>::safe_insert): New. (vec<T, A, vl_ptr>::ordered_remove): New. (vec<T, A, vl_ptr>::unordered_remove): New. (vec<T, A, vl_ptr>::block_remove): New. (vec<T, A, vl_ptr>::qsort): New. (vec<T, A, vl_ptr>::lower_bound): New. (vec_stack_alloc): Define. (FOR_EACH_VEC_SAFE_ELT): Define. * vecir.h: Remove. Update all users. * vecprim.h: Remove. Update all users. Move uchar to coretypes.h. * Makefile.in (VEC_H): Add $(GGC_H). Remove vecir.h and vecprim.h dependencies everywhere. 2012-11-16 Diego Novillo <dnovillo@google.com> * gengtype-lex.l (VEC): Remove. Add characters in the set [\!\>\.-]. * gengtype-parse.c (token_names): Remove "VEC". (require_template_declaration): Remove handling of VEC_TOKEN. (type): Likewise. Call create_user_defined_type when parsing GTY((user)). * gengtype-state.c (type_lineloc): handle TYPE_UNDEFINED. (write_state_undefined_type): New. (write_state_type): Call write_state_undefined_type for TYPE_UNDEFINED. (read_state_type): Call read_state_undefined_type for TYPE_UNDEFINED. * gengtype.c (dbgprint_count_type_at): Handle TYPE_UNDEFINED. (create_user_defined_type): Make extern. (type_for_name): Factor out of resolve_typedef. (create_undefined_type): New (resolve_typedef): Call it when we cannot find a previous typedef and the type is not a template. (find_structure): Accept TYPE_UNDEFINED. (set_gc_used_type): Add argument ALLOWED_UNDEFINED_TYPES, default to false. Emit an error for TYPE_UNDEFINED unless LEVEL is GC_UNUSED or ALLOWED_UNDEFINED_TYPES is set. Set ALLOWED_UNDEFINED_TYPES to true for TYPE_USER_STRUCT. (filter_type_name): Accept templates with more than one argument. (output_mangled_typename): Handle TYPE_UNDEFINED (walk_type): Likewise. (write_types_process_field): Likewise. (write_func_for_structure): If CHAIN_NEXT is set, ORIG_S should not be a user-defined type. (write_types_local_user_process_field): Handle TYPE_ARRAY, TYPE_NONE and TYPE_UNDEFINED. (write_types_local_process_field): Likewise. (contains_scalar_p): Return 0 for TYPE_USER_STRUCT. (write_root): Reject user-defined types that are not pointers. Handle TYPE_NONE, TYPE_UNDEFINED, TYPE_UNION, TYPE_LANG_STRUCT and TYPE_PARAM_STRUCT. (output_typename): Handle TYPE_NONE, TYPE_UNDEFINED, and TYPE_ARRAY. (dump_typekind): Handle TYPE_UNDEFINED. * gengtype.h (enum typekind): Add TYPE_UNDEFINED. (create_user_defined_type): Declare. (enum gty_token): Remove VEC_TOKEN. 2012-11-16 Diego Novillo <dnovillo@google.com> Adjust for new vec API (http://gcc.gnu.org/wiki/cxx-conversion/cxx-vec) * coretypes.h (uchar): Define. * alias.c: Use new vec API in vec.h. * asan.c: Likewise. * attribs.c: Likewise. * basic-block.h: Likewise. * bb-reorder.c: Likewise. * builtins.c: Likewise. * calls.c: Likewise. * cfg.c: Likewise. * cfganal.c: Likewise. * cfgcleanup.c: Likewise. * cfgexpand.c: Likewise. * cfghooks.c: Likewise. * cfghooks.h: Likewise. * cfgloop.c: Likewise. * cfgloop.h: Likewise. * cfgloopanal.c: Likewise. * cfgloopmanip.c: Likewise. * cfgrtl.c: Likewise. * cgraph.c: Likewise. * cgraph.h: Likewise. * cgraphclones.c: Likewise. * cgraphunit.c: Likewise. * combine.c: Likewise. * compare-elim.c: Likewise. * coverage.c: Likewise. * cprop.c: Likewise. * data-streamer.h: Likewise. * dbxout.c: Likewise. * dce.c: Likewise. * df-core.c: Likewise. * df-problems.c: Likewise. * df-scan.c: Likewise. * dominance.c: Likewise. * domwalk.c: Likewise. * domwalk.h: Likewise. * dse.c: Likewise. * dwarf2cfi.c: Likewise. * dwarf2out.c: Likewise. * dwarf2out.h: Likewise. * emit-rtl.c: Likewise. * except.c: Likewise. * except.h: Likewise. * expr.c: Likewise. * expr.h: Likewise. * final.c: Likewise. * fold-const.c: Likewise. * function.c: Likewise. * function.h: Likewise. * fwprop.c: Likewise. * gcc.c: Likewise. * gcse.c: Likewise. * genattr.c: Likewise. * genattrtab.c: Likewise. * genautomata.c: Likewise. * genextract.c: Likewise. * genopinit.c: Likewise * ggc-common.c: Likewise. * ggc.h: Likewise. * gimple-low.c: Likewise. * gimple-ssa-strength-reduction.c: Likewise. * gimple-streamer-in.c: Likewise. * gimple.c: Likewise. * gimple.h: Likewise. * gimplify.c: Likewise. * graph.c: Likewise. * graphds.c: Likewise. * graphds.h: Likewise. * graphite-blocking.c: Likewise. * graphite-clast-to-gimple.c: Likewise. * graphite-dependences.c: Likewise. * graphite-interchange.c: Likewise. * graphite-optimize-isl.c: Likewise. * graphite-poly.c: Likewise. * graphite-poly.h: Likewise. * graphite-scop-detection.c: Likewise. * graphite-scop-detection.h: Likewise. * graphite-sese-to-poly.c: Likewise. * graphite.c: Likewise. * godump.c: Likewise. * haifa-sched.c: Likewise. * hw-doloop.c: Likewise. * hw-doloop.h: Likewise. * ifcvt.c: Likewise. * insn-addr.h: Likewise. * ipa-cp.c: Likewise. * ipa-inline-analysis.c: Likewise. * ipa-inline-transform.c: Likewise. * ipa-inline.c: Likewise. * ipa-inline.h: Likewise. * ipa-prop.c: Likewise. * ipa-prop.h: Likewise. * ipa-pure-const.c: Likewise. * ipa-ref-inline.h: Likewise. * ipa-ref.c: Likewise. * ipa-ref.h: Likewise. * ipa-reference.c: Likewise. * ipa-split.c: Likewise. * ipa-utils.c: Likewise. * ipa-utils.h: Likewise. * ipa.c: Likewise. * ira-build.c: Likewise. * ira-color.c: Likewise. * ira-emit.c: Likewise. * ira-int.h: Likewise. * ira.c: Likewise. * loop-invariant.c: Likewise. * loop-unroll.c: Likewise. * lower-subreg.c: Likewise. * lra-lives.c: Likewise. * lra.c: Likewise. * lto-cgraph.c: Likewise. * lto-section-out.c: Likewise. * lto-streamer-in.c: Likewise. * lto-streamer-out.c: Likewise. * lto-streamer.h: Likewise. * lto-symtab.c: Likewise. * mcf.c: Likewise. * modulo-sched.c: Likewise. * omp-low.c: Likewise. * opts-common.c: Likewise. * opts-global.c: Likewise. * opts.c: Likewise. * opts.h: Likewise. * passes.c: Likewise. * predict.c: Likewise. * print-tree.c: Likewise. * profile.c: Likewise. * profile.h: Likewise. * read-rtl.c: Likewise. * ree.c: Likewise. * reg-stack.c: Likewise. * regrename.c: Likewise. * regrename.h: Likewise. * reload.c: Likewise. * reload.h: Likewise. * reload1.c: Likewise. * rtl.h: Likewise. * sched-deps.c: Likewise. * sched-int.h: Likewise. * sdbout.c: Likewise. * sel-sched-dump.c: Likewise. * sel-sched-ir.c: Likewise. * sel-sched-ir.h: Likewise. * sel-sched.c: Likewise. * sese.c: Likewise. * sese.h: Likewise. * statistics.h: Likewise. * stmt.c: Likewise. * stor-layout.c: Likewise. * store-motion.c: Likewise. * tlink.c: Likewise. * toplev.c: Likewise. * trans-mem.c: Likewise. * tree-browser.c: Likewise. * tree-call-cdce.c: Likewise. * tree-cfg.c: Likewise. * tree-cfgcleanup.c: Likewise. * tree-chrec.c: Likewise. * tree-chrec.h: Likewise. * tree-complex.c: Likewise. * tree-data-ref.c: Likewise. * tree-data-ref.h: Likewise. * tree-dfa.c: Likewise. * tree-diagnostic.c: Likewise. * tree-dump.c: Likewise. * tree-eh.c: Likewise. * tree-emutls.c: Likewise. * tree-flow.h: Likewise. * tree-if-conv.c: Likewise. * tree-inline.c: Likewise. * tree-inline.h: Likewise. * tree-into-ssa.c: Likewise. * tree-iterator.c: Likewise. * tree-loop-distribution.c: Likewise. * tree-mudflap.c: Likewise. * tree-optimize.c: Likewise. * tree-outof-ssa.c: Likewise. * tree-parloops.c: Likewise. * tree-phinodes.c: Likewise. * tree-predcom.c: Likewise. * tree-pretty-print.c: Likewise. * tree-scalar-evolution.c: Likewise. * tree-sra.c: Likewise. * tree-ssa-address.c: Likewise. * tree-ssa-alias.c: Likewise. * tree-ssa-ccp.c: Likewise. * tree-ssa-coalesce.c: Likewise. * tree-ssa-dce.c: Likewise. * tree-ssa-dom.c: Likewise. * tree-ssa-forwprop.c: Likewise. * tree-ssa-live.c: Likewise. * tree-ssa-live.h: Likewise. * tree-ssa-loop-im.c: Likewise. * tree-ssa-loop-ivcanon.c: Likewise. * tree-ssa-loop-ivopts.c: Likewise. * tree-ssa-loop-manip.c: Likewise. * tree-ssa-loop-niter.c: Likewise. * tree-ssa-loop-prefetch.c: Likewise. * tree-ssa-math-opts.c: Likewise. * tree-ssa-operands.c: Likewise. * tree-ssa-phiopt.c: Likewise. * tree-ssa-phiprop.c: Likewise. * tree-ssa-pre.c: Likewise. * tree-ssa-propagate.c: Likewise. * tree-ssa-reassoc.c: Likewise. * tree-ssa-sccvn.c: Likewise. * tree-ssa-sccvn.h: Likewise. * tree-ssa-strlen.c: Likewise. * tree-ssa-structalias.c: Likewise. * tree-ssa-tail-merge.c: Likewise. * tree-ssa-threadedge.c: Likewise. * tree-ssa-threadupdate.c: Likewise. * tree-ssa-uncprop.c: Likewise. * tree-ssa-uninit.c: Likewise. * tree-ssa.c: Likewise. * tree-ssanames.c: Likewise. * tree-stdarg.c: Likewise. * tree-streamer-in.c: Likewise. * tree-streamer-out.c: Likewise. * tree-streamer.c: Likewise. * tree-streamer.h: Likewise. * tree-switch-conversion.c: Likewise. * tree-vect-data-refs.c: Likewise. * tree-vect-generic.c: Likewise. * tree-vect-loop-manip.c: Likewise. * tree-vect-loop.c: Likewise. * tree-vect-patterns.c: Likewise. * tree-vect-slp.c: Likewise. * tree-vect-stmts.c: Likewise. * tree-vectorizer.c: Likewise. * tree-vectorizer.h: Likewise. * tree-vrp.c: Likewise. * tree.c: Likewise. * tree.h: Likewise. * value-prof.c: Likewise. * value-prof.h: Likewise. * var-tracking.c: Likewise. * varasm.c: Likewise. * varpool.c: Likewise. * vmsdbgout.c: Likewise. * config/bfin/bfin.c: Likewise. * config/c6x/c6x.c: Likewise. * config/darwin.c: Likewise. * config/i386/i386.c: Likewise. * config/ia64/ia64.c: Likewise. * config/mep/mep.c: Likewise. * config/mips/mips.c: Likewise. * config/pa/pa.c: Likewise. * config/rs6000/rs6000-c.c: Likewise. * config/rs6000/rs6000.c: Likewise. * config/rx/rx.c: Likewise. * config/spu/spu-c.c: Likewise. * config/vms/vms.c: Likewise. * config/vxworks.c: Likewise. * config/epiphany/resolve-sw-modes.c: Likewise. From-SVN: r193595
1606 lines
42 KiB
C
1606 lines
42 KiB
C
/* Loop distribution.
|
|
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
|
|
Free Software Foundation, Inc.
|
|
Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
|
|
and Sebastian Pop <sebastian.pop@amd.com>.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 3, or (at your option) any
|
|
later version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This pass performs loop distribution: for example, the loop
|
|
|
|
|DO I = 2, N
|
|
| A(I) = B(I) + C
|
|
| D(I) = A(I-1)*E
|
|
|ENDDO
|
|
|
|
is transformed to
|
|
|
|
|DOALL I = 2, N
|
|
| A(I) = B(I) + C
|
|
|ENDDO
|
|
|
|
|
|DOALL I = 2, N
|
|
| D(I) = A(I-1)*E
|
|
|ENDDO
|
|
|
|
This pass uses an RDG, Reduced Dependence Graph built on top of the
|
|
data dependence relations. The RDG is then topologically sorted to
|
|
obtain a map of information producers/consumers based on which it
|
|
generates the new loops. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "tree-flow.h"
|
|
#include "cfgloop.h"
|
|
#include "tree-chrec.h"
|
|
#include "tree-data-ref.h"
|
|
#include "tree-scalar-evolution.h"
|
|
#include "tree-pass.h"
|
|
|
|
enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
|
|
|
|
typedef struct partition_s
|
|
{
|
|
bitmap stmts;
|
|
bool has_writes;
|
|
enum partition_kind kind;
|
|
/* data-references a kind != PKIND_NORMAL partition is about. */
|
|
data_reference_p main_dr;
|
|
data_reference_p secondary_dr;
|
|
} *partition_t;
|
|
|
|
|
|
/* Allocate and initialize a partition from BITMAP. */
|
|
|
|
static partition_t
|
|
partition_alloc (bitmap stmts)
|
|
{
|
|
partition_t partition = XCNEW (struct partition_s);
|
|
partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
|
|
partition->has_writes = false;
|
|
partition->kind = PKIND_NORMAL;
|
|
return partition;
|
|
}
|
|
|
|
/* Free PARTITION. */
|
|
|
|
static void
|
|
partition_free (partition_t partition)
|
|
{
|
|
BITMAP_FREE (partition->stmts);
|
|
free (partition);
|
|
}
|
|
|
|
/* Returns true if the partition can be generated as a builtin. */
|
|
|
|
static bool
|
|
partition_builtin_p (partition_t partition)
|
|
{
|
|
return partition->kind != PKIND_NORMAL;
|
|
}
|
|
|
|
/* Returns true if the partition has an writes. */
|
|
|
|
static bool
|
|
partition_has_writes (partition_t partition)
|
|
{
|
|
return partition->has_writes;
|
|
}
|
|
|
|
/* If bit I is not set, it means that this node represents an
|
|
operation that has already been performed, and that should not be
|
|
performed again. This is the subgraph of remaining important
|
|
computations that is passed to the DFS algorithm for avoiding to
|
|
include several times the same stores in different loops. */
|
|
static bitmap remaining_stmts;
|
|
|
|
/* A node of the RDG is marked in this bitmap when it has as a
|
|
predecessor a node that writes to memory. */
|
|
static bitmap upstream_mem_writes;
|
|
|
|
/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
|
|
the LOOP. */
|
|
|
|
static bool
|
|
ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
|
|
{
|
|
imm_use_iterator imm_iter;
|
|
use_operand_p use_p;
|
|
|
|
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
|
|
{
|
|
gimple use_stmt = USE_STMT (use_p);
|
|
if (!is_gimple_debug (use_stmt)
|
|
&& loop != loop_containing_stmt (use_stmt))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Returns true when STMT defines a scalar variable used after the
|
|
loop LOOP. */
|
|
|
|
static bool
|
|
stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
|
|
{
|
|
def_operand_p def_p;
|
|
ssa_op_iter op_iter;
|
|
|
|
if (gimple_code (stmt) == GIMPLE_PHI)
|
|
return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
|
|
|
|
FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
|
|
if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
|
|
ORIG_LOOP. */
|
|
|
|
static void
|
|
update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
|
|
{
|
|
tree new_ssa_name;
|
|
gimple_stmt_iterator si_new, si_orig;
|
|
edge orig_loop_latch = loop_latch_edge (orig_loop);
|
|
edge orig_entry_e = loop_preheader_edge (orig_loop);
|
|
edge new_loop_entry_e = loop_preheader_edge (new_loop);
|
|
|
|
/* Scan the phis in the headers of the old and new loops
|
|
(they are organized in exactly the same order). */
|
|
for (si_new = gsi_start_phis (new_loop->header),
|
|
si_orig = gsi_start_phis (orig_loop->header);
|
|
!gsi_end_p (si_new) && !gsi_end_p (si_orig);
|
|
gsi_next (&si_new), gsi_next (&si_orig))
|
|
{
|
|
tree def;
|
|
source_location locus;
|
|
gimple phi_new = gsi_stmt (si_new);
|
|
gimple phi_orig = gsi_stmt (si_orig);
|
|
|
|
/* Add the first phi argument for the phi in NEW_LOOP (the one
|
|
associated with the entry of NEW_LOOP) */
|
|
def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
|
|
locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
|
|
add_phi_arg (phi_new, def, new_loop_entry_e, locus);
|
|
|
|
/* Add the second phi argument for the phi in NEW_LOOP (the one
|
|
associated with the latch of NEW_LOOP) */
|
|
def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
|
|
locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
|
|
|
|
if (TREE_CODE (def) == SSA_NAME)
|
|
{
|
|
new_ssa_name = get_current_def (def);
|
|
|
|
if (!new_ssa_name)
|
|
/* This only happens if there are no definitions inside the
|
|
loop. Use the the invariant in the new loop as is. */
|
|
new_ssa_name = def;
|
|
}
|
|
else
|
|
/* Could be an integer. */
|
|
new_ssa_name = def;
|
|
|
|
add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
|
|
}
|
|
}
|
|
|
|
/* Return a copy of LOOP placed before LOOP. */
|
|
|
|
static struct loop *
|
|
copy_loop_before (struct loop *loop)
|
|
{
|
|
struct loop *res;
|
|
edge preheader = loop_preheader_edge (loop);
|
|
|
|
initialize_original_copy_tables ();
|
|
res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
|
|
gcc_assert (res != NULL);
|
|
free_original_copy_tables ();
|
|
|
|
update_phis_for_loop_copy (loop, res);
|
|
rename_variables_in_loop (res);
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Creates an empty basic block after LOOP. */
|
|
|
|
static void
|
|
create_bb_after_loop (struct loop *loop)
|
|
{
|
|
edge exit = single_exit (loop);
|
|
|
|
if (!exit)
|
|
return;
|
|
|
|
split_edge (exit);
|
|
}
|
|
|
|
/* Generate code for PARTITION from the code in LOOP. The loop is
|
|
copied when COPY_P is true. All the statements not flagged in the
|
|
PARTITION bitmap are removed from the loop or from its copy. The
|
|
statements are indexed in sequence inside a basic block, and the
|
|
basic blocks of a loop are taken in dom order. */
|
|
|
|
static void
|
|
generate_loops_for_partition (struct loop *loop, partition_t partition,
|
|
bool copy_p)
|
|
{
|
|
unsigned i, x;
|
|
gimple_stmt_iterator bsi;
|
|
basic_block *bbs;
|
|
|
|
if (copy_p)
|
|
{
|
|
loop = copy_loop_before (loop);
|
|
gcc_assert (loop != NULL);
|
|
create_preheader (loop, CP_SIMPLE_PREHEADERS);
|
|
create_bb_after_loop (loop);
|
|
}
|
|
|
|
/* Remove stmts not in the PARTITION bitmap. The order in which we
|
|
visit the phi nodes and the statements is exactly as in
|
|
stmts_from_loop. */
|
|
bbs = get_loop_body_in_dom_order (loop);
|
|
|
|
if (MAY_HAVE_DEBUG_STMTS)
|
|
for (x = 0, i = 0; i < loop->num_nodes; i++)
|
|
{
|
|
basic_block bb = bbs[i];
|
|
|
|
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
|
if (!bitmap_bit_p (partition->stmts, x++))
|
|
reset_debug_uses (gsi_stmt (bsi));
|
|
|
|
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
|
{
|
|
gimple stmt = gsi_stmt (bsi);
|
|
if (gimple_code (stmt) != GIMPLE_LABEL
|
|
&& !is_gimple_debug (stmt)
|
|
&& !bitmap_bit_p (partition->stmts, x++))
|
|
reset_debug_uses (stmt);
|
|
}
|
|
}
|
|
|
|
for (x = 0, i = 0; i < loop->num_nodes; i++)
|
|
{
|
|
basic_block bb = bbs[i];
|
|
|
|
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
|
|
if (!bitmap_bit_p (partition->stmts, x++))
|
|
{
|
|
gimple phi = gsi_stmt (bsi);
|
|
if (virtual_operand_p (gimple_phi_result (phi)))
|
|
mark_virtual_phi_result_for_renaming (phi);
|
|
remove_phi_node (&bsi, true);
|
|
}
|
|
else
|
|
gsi_next (&bsi);
|
|
|
|
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
|
|
{
|
|
gimple stmt = gsi_stmt (bsi);
|
|
if (gimple_code (stmt) != GIMPLE_LABEL
|
|
&& !is_gimple_debug (stmt)
|
|
&& !bitmap_bit_p (partition->stmts, x++))
|
|
{
|
|
unlink_stmt_vdef (stmt);
|
|
gsi_remove (&bsi, true);
|
|
release_defs (stmt);
|
|
}
|
|
else
|
|
gsi_next (&bsi);
|
|
}
|
|
}
|
|
|
|
free (bbs);
|
|
}
|
|
|
|
/* Build the size argument for a memory operation call. */
|
|
|
|
static tree
|
|
build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
|
|
{
|
|
tree size;
|
|
size = fold_build2_loc (loc, MULT_EXPR, sizetype,
|
|
fold_convert_loc (loc, sizetype, nb_iter),
|
|
TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
|
|
return fold_convert_loc (loc, size_type_node, size);
|
|
}
|
|
|
|
/* Build an address argument for a memory operation call. */
|
|
|
|
static tree
|
|
build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
|
|
{
|
|
tree addr_base;
|
|
|
|
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
|
|
addr_base = fold_convert_loc (loc, sizetype, addr_base);
|
|
|
|
/* Test for a negative stride, iterating over every element. */
|
|
if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
|
|
{
|
|
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
|
|
fold_convert_loc (loc, sizetype, nb_bytes));
|
|
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
|
|
TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
|
|
}
|
|
|
|
return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
|
|
}
|
|
|
|
/* Generate a call to memset for PARTITION in LOOP. */
|
|
|
|
static void
|
|
generate_memset_builtin (struct loop *loop, partition_t partition)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
gimple stmt, fn_call;
|
|
tree nb_iter, mem, fn, nb_bytes;
|
|
location_t loc;
|
|
tree val;
|
|
|
|
stmt = DR_STMT (partition->main_dr);
|
|
loc = gimple_location (stmt);
|
|
if (gimple_bb (stmt) == loop->latch)
|
|
nb_iter = number_of_latch_executions (loop);
|
|
else
|
|
nb_iter = number_of_exit_cond_executions (loop);
|
|
|
|
/* The new statements will be placed before LOOP. */
|
|
gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
|
|
|
|
nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
|
|
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
|
|
false, GSI_CONTINUE_LINKING);
|
|
mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
|
|
mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
|
|
false, GSI_CONTINUE_LINKING);
|
|
|
|
/* This exactly matches the pattern recognition in classify_partition. */
|
|
val = gimple_assign_rhs1 (stmt);
|
|
if (integer_zerop (val)
|
|
|| real_zerop (val)
|
|
|| TREE_CODE (val) == CONSTRUCTOR)
|
|
val = integer_zero_node;
|
|
else if (integer_all_onesp (val))
|
|
val = build_int_cst (integer_type_node, -1);
|
|
else
|
|
{
|
|
if (TREE_CODE (val) == INTEGER_CST)
|
|
val = fold_convert (integer_type_node, val);
|
|
else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
|
|
{
|
|
gimple cstmt;
|
|
tree tem = make_ssa_name (integer_type_node, NULL);
|
|
cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
|
|
gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
|
|
val = tem;
|
|
}
|
|
}
|
|
|
|
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
|
|
fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
|
|
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "generated memset");
|
|
if (integer_zerop (val))
|
|
fprintf (dump_file, " zero\n");
|
|
else if (integer_all_onesp (val))
|
|
fprintf (dump_file, " minus one\n");
|
|
else
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
}
|
|
|
|
/* Generate a call to memcpy for PARTITION in LOOP. */
|
|
|
|
static void
|
|
generate_memcpy_builtin (struct loop *loop, partition_t partition)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
gimple stmt, fn_call;
|
|
tree nb_iter, dest, src, fn, nb_bytes;
|
|
location_t loc;
|
|
enum built_in_function kind;
|
|
|
|
stmt = DR_STMT (partition->main_dr);
|
|
loc = gimple_location (stmt);
|
|
if (gimple_bb (stmt) == loop->latch)
|
|
nb_iter = number_of_latch_executions (loop);
|
|
else
|
|
nb_iter = number_of_exit_cond_executions (loop);
|
|
|
|
/* The new statements will be placed before LOOP. */
|
|
gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
|
|
|
|
nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
|
|
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
|
|
false, GSI_CONTINUE_LINKING);
|
|
dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
|
|
src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
|
|
if (ptr_derefs_may_alias_p (dest, src))
|
|
kind = BUILT_IN_MEMMOVE;
|
|
else
|
|
kind = BUILT_IN_MEMCPY;
|
|
|
|
dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
|
|
false, GSI_CONTINUE_LINKING);
|
|
src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
|
|
false, GSI_CONTINUE_LINKING);
|
|
fn = build_fold_addr_expr (builtin_decl_implicit (kind));
|
|
fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
|
|
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
if (kind == BUILT_IN_MEMCPY)
|
|
fprintf (dump_file, "generated memcpy\n");
|
|
else
|
|
fprintf (dump_file, "generated memmove\n");
|
|
}
|
|
}
|
|
|
|
/* Remove and destroy the loop LOOP. */
|
|
|
|
static void
|
|
destroy_loop (struct loop *loop)
|
|
{
|
|
unsigned nbbs = loop->num_nodes;
|
|
edge exit = single_exit (loop);
|
|
basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
|
|
basic_block *bbs;
|
|
unsigned i;
|
|
|
|
bbs = get_loop_body_in_dom_order (loop);
|
|
|
|
redirect_edge_pred (exit, src);
|
|
exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
|
|
exit->flags |= EDGE_FALLTHRU;
|
|
cancel_loop_tree (loop);
|
|
rescan_loop_exit (exit, false, true);
|
|
|
|
for (i = 0; i < nbbs; i++)
|
|
{
|
|
/* We have made sure to not leave any dangling uses of SSA
|
|
names defined in the loop. With the exception of virtuals.
|
|
Make sure we replace all uses of virtual defs that will remain
|
|
outside of the loop with the bare symbol as delete_basic_block
|
|
will release them. */
|
|
gimple_stmt_iterator gsi;
|
|
for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple phi = gsi_stmt (gsi);
|
|
if (virtual_operand_p (gimple_phi_result (phi)))
|
|
mark_virtual_phi_result_for_renaming (phi);
|
|
}
|
|
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple stmt = gsi_stmt (gsi);
|
|
tree vdef = gimple_vdef (stmt);
|
|
if (vdef && TREE_CODE (vdef) == SSA_NAME)
|
|
mark_virtual_operand_for_renaming (vdef);
|
|
}
|
|
delete_basic_block (bbs[i]);
|
|
}
|
|
free (bbs);
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, dest,
|
|
recompute_dominator (CDI_DOMINATORS, dest));
|
|
}
|
|
|
|
/* Generates code for PARTITION. */
|
|
|
|
static void
|
|
generate_code_for_partition (struct loop *loop,
|
|
partition_t partition, bool copy_p)
|
|
{
|
|
switch (partition->kind)
|
|
{
|
|
case PKIND_MEMSET:
|
|
generate_memset_builtin (loop, partition);
|
|
/* If this is the last partition for which we generate code, we have
|
|
to destroy the loop. */
|
|
if (!copy_p)
|
|
destroy_loop (loop);
|
|
break;
|
|
|
|
case PKIND_MEMCPY:
|
|
generate_memcpy_builtin (loop, partition);
|
|
/* If this is the last partition for which we generate code, we have
|
|
to destroy the loop. */
|
|
if (!copy_p)
|
|
destroy_loop (loop);
|
|
break;
|
|
|
|
case PKIND_NORMAL:
|
|
generate_loops_for_partition (loop, partition, copy_p);
|
|
break;
|
|
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
}
|
|
|
|
|
|
/* Returns true if the node V of RDG cannot be recomputed. */
|
|
|
|
static bool
|
|
rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
|
|
{
|
|
if (RDG_MEM_WRITE_STMT (rdg, v))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Returns true when the vertex V has already been generated in the
|
|
current partition (V is in PROCESSED), or when V belongs to another
|
|
partition and cannot be recomputed (V is not in REMAINING_STMTS). */
|
|
|
|
static inline bool
|
|
already_processed_vertex_p (bitmap processed, int v)
|
|
{
|
|
return (bitmap_bit_p (processed, v)
|
|
|| !bitmap_bit_p (remaining_stmts, v));
|
|
}
|
|
|
|
/* Returns NULL when there is no anti-dependence among the successors
|
|
of vertex V, otherwise returns the edge with the anti-dep. */
|
|
|
|
static struct graph_edge *
|
|
has_anti_dependence (struct vertex *v)
|
|
{
|
|
struct graph_edge *e;
|
|
|
|
if (v->succ)
|
|
for (e = v->succ; e; e = e->succ_next)
|
|
if (RDGE_TYPE (e) == anti_dd)
|
|
return e;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Returns true when V has an anti-dependence edge among its successors. */
|
|
|
|
static bool
|
|
predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
|
|
{
|
|
struct graph_edge *e;
|
|
|
|
if (v->pred)
|
|
for (e = v->pred; e; e = e->pred_next)
|
|
if (bitmap_bit_p (upstream_mem_writes, e->src)
|
|
/* Don't consider flow channels: a write to memory followed
|
|
by a read from memory. These channels allow the split of
|
|
the RDG in different partitions. */
|
|
&& !RDG_MEM_WRITE_STMT (rdg, e->src))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Initializes the upstream_mem_writes bitmap following the
|
|
information from RDG. */
|
|
|
|
static void
|
|
mark_nodes_having_upstream_mem_writes (struct graph *rdg)
|
|
{
|
|
int v, x;
|
|
bitmap seen = BITMAP_ALLOC (NULL);
|
|
|
|
for (v = rdg->n_vertices - 1; v >= 0; v--)
|
|
if (!bitmap_bit_p (seen, v))
|
|
{
|
|
unsigned i;
|
|
vec<int> nodes;
|
|
nodes.create (3);
|
|
|
|
graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
|
|
|
|
FOR_EACH_VEC_ELT (nodes, i, x)
|
|
{
|
|
if (!bitmap_set_bit (seen, x))
|
|
continue;
|
|
|
|
if (RDG_MEM_WRITE_STMT (rdg, x)
|
|
|| predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
|
|
/* In anti dependences the read should occur before
|
|
the write, this is why both the read and the write
|
|
should be placed in the same partition. */
|
|
|| has_anti_dependence (&(rdg->vertices[x])))
|
|
{
|
|
bitmap_set_bit (upstream_mem_writes, x);
|
|
}
|
|
}
|
|
|
|
nodes.release ();
|
|
}
|
|
}
|
|
|
|
/* Returns true when vertex u has a memory write node as a predecessor
|
|
in RDG. */
|
|
|
|
static bool
|
|
has_upstream_mem_writes (int u)
|
|
{
|
|
return bitmap_bit_p (upstream_mem_writes, u);
|
|
}
|
|
|
|
static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
|
|
bitmap, bitmap);
|
|
|
|
/* Flag the uses of U stopping following the information from
|
|
upstream_mem_writes. */
|
|
|
|
static void
|
|
rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
|
|
bitmap processed)
|
|
{
|
|
use_operand_p use_p;
|
|
struct vertex *x = &(rdg->vertices[u]);
|
|
gimple stmt = RDGV_STMT (x);
|
|
struct graph_edge *anti_dep = has_anti_dependence (x);
|
|
|
|
/* Keep in the same partition the destination of an antidependence,
|
|
because this is a store to the exact same location. Putting this
|
|
in another partition is bad for cache locality. */
|
|
if (anti_dep)
|
|
{
|
|
int v = anti_dep->dest;
|
|
|
|
if (!already_processed_vertex_p (processed, v))
|
|
rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
|
|
processed);
|
|
}
|
|
|
|
if (gimple_code (stmt) != GIMPLE_PHI)
|
|
{
|
|
if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
|
|
{
|
|
tree use = USE_FROM_PTR (use_p);
|
|
|
|
if (TREE_CODE (use) == SSA_NAME)
|
|
{
|
|
gimple def_stmt = SSA_NAME_DEF_STMT (use);
|
|
int v = rdg_vertex_for_stmt (rdg, def_stmt);
|
|
|
|
if (v >= 0
|
|
&& !already_processed_vertex_p (processed, v))
|
|
rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
|
|
processed);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
|
|
{
|
|
tree op0 = gimple_assign_lhs (stmt);
|
|
|
|
/* Scalar channels don't have enough space for transmitting data
|
|
between tasks, unless we add more storage by privatizing. */
|
|
if (is_gimple_reg (op0))
|
|
{
|
|
use_operand_p use_p;
|
|
imm_use_iterator iter;
|
|
|
|
FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
|
|
{
|
|
int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
|
|
|
|
if (!already_processed_vertex_p (processed, v))
|
|
rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
|
|
processed);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Flag V from RDG as part of PARTITION, and also flag its loop number
|
|
in LOOPS. */
|
|
|
|
static void
|
|
rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
|
|
{
|
|
struct loop *loop;
|
|
|
|
if (!bitmap_set_bit (partition->stmts, v))
|
|
return;
|
|
|
|
loop = loop_containing_stmt (RDG_STMT (rdg, v));
|
|
bitmap_set_bit (loops, loop->num);
|
|
|
|
if (rdg_cannot_recompute_vertex_p (rdg, v))
|
|
{
|
|
partition->has_writes = true;
|
|
bitmap_clear_bit (remaining_stmts, v);
|
|
}
|
|
}
|
|
|
|
/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
|
|
Also flag their loop number in LOOPS. */
|
|
|
|
static void
|
|
rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
|
|
bitmap loops, bitmap processed)
|
|
{
|
|
unsigned i;
|
|
vec<int> nodes;
|
|
nodes.create (3);
|
|
int x;
|
|
|
|
bitmap_set_bit (processed, v);
|
|
rdg_flag_uses (rdg, v, partition, loops, processed);
|
|
graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
|
|
rdg_flag_vertex (rdg, v, partition, loops);
|
|
|
|
FOR_EACH_VEC_ELT (nodes, i, x)
|
|
if (!already_processed_vertex_p (processed, x))
|
|
rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
|
|
|
|
nodes.release ();
|
|
}
|
|
|
|
/* Initialize CONDS with all the condition statements from the basic
|
|
blocks of LOOP. */
|
|
|
|
static void
|
|
collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
|
|
{
|
|
unsigned i;
|
|
edge e;
|
|
vec<edge> exits = get_loop_exit_edges (loop);
|
|
|
|
FOR_EACH_VEC_ELT (exits, i, e)
|
|
{
|
|
gimple cond = last_stmt (e->src);
|
|
|
|
if (cond)
|
|
conds->safe_push (cond);
|
|
}
|
|
|
|
exits.release ();
|
|
}
|
|
|
|
/* Add to PARTITION all the exit condition statements for LOOPS
|
|
together with all their dependent statements determined from
|
|
RDG. */
|
|
|
|
static void
|
|
rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
|
|
bitmap processed)
|
|
{
|
|
unsigned i;
|
|
bitmap_iterator bi;
|
|
vec<gimple> conds;
|
|
conds.create (3);
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
|
|
collect_condition_stmts (get_loop (i), &conds);
|
|
|
|
while (!conds.is_empty ())
|
|
{
|
|
gimple cond = conds.pop ();
|
|
int v = rdg_vertex_for_stmt (rdg, cond);
|
|
bitmap new_loops = BITMAP_ALLOC (NULL);
|
|
|
|
if (!already_processed_vertex_p (processed, v))
|
|
rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
|
|
if (bitmap_set_bit (loops, i))
|
|
collect_condition_stmts (get_loop (i), &conds);
|
|
|
|
BITMAP_FREE (new_loops);
|
|
}
|
|
|
|
conds.release ();
|
|
}
|
|
|
|
/* Returns a bitmap in which all the statements needed for computing
|
|
the strongly connected component C of the RDG are flagged, also
|
|
including the loop exit conditions. */
|
|
|
|
static partition_t
|
|
build_rdg_partition_for_component (struct graph *rdg, rdgc c)
|
|
{
|
|
int i, v;
|
|
partition_t partition = partition_alloc (NULL);
|
|
bitmap loops = BITMAP_ALLOC (NULL);
|
|
bitmap processed = BITMAP_ALLOC (NULL);
|
|
|
|
FOR_EACH_VEC_ELT (c->vertices, i, v)
|
|
if (!already_processed_vertex_p (processed, v))
|
|
rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
|
|
|
|
rdg_flag_loop_exits (rdg, loops, partition, processed);
|
|
|
|
BITMAP_FREE (processed);
|
|
BITMAP_FREE (loops);
|
|
return partition;
|
|
}
|
|
|
|
/* Free memory for COMPONENTS. */
|
|
|
|
static void
|
|
free_rdg_components (vec<rdgc> components)
|
|
{
|
|
int i;
|
|
rdgc x;
|
|
|
|
FOR_EACH_VEC_ELT (components, i, x)
|
|
{
|
|
x->vertices.release ();
|
|
free (x);
|
|
}
|
|
|
|
components.release ();
|
|
}
|
|
|
|
/* Build the COMPONENTS vector with the strongly connected components
|
|
of RDG in which the STARTING_VERTICES occur. */
|
|
|
|
static void
|
|
rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
|
|
vec<rdgc> *components)
|
|
{
|
|
int i, v;
|
|
bitmap saved_components = BITMAP_ALLOC (NULL);
|
|
int n_components = graphds_scc (rdg, NULL);
|
|
/* ??? Macros cannot process template types with more than one
|
|
argument, so we need this typedef. */
|
|
typedef vec<int> vec_int_heap;
|
|
vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
|
|
|
|
for (i = 0; i < n_components; i++)
|
|
all_components[i].create (3);
|
|
|
|
for (i = 0; i < rdg->n_vertices; i++)
|
|
all_components[rdg->vertices[i].component].safe_push (i);
|
|
|
|
FOR_EACH_VEC_ELT (starting_vertices, i, v)
|
|
{
|
|
int c = rdg->vertices[v].component;
|
|
|
|
if (bitmap_set_bit (saved_components, c))
|
|
{
|
|
rdgc x = XCNEW (struct rdg_component);
|
|
x->num = c;
|
|
x->vertices = all_components[c];
|
|
|
|
components->safe_push (x);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < n_components; i++)
|
|
if (!bitmap_bit_p (saved_components, i))
|
|
all_components[i].release ();
|
|
|
|
free (all_components);
|
|
BITMAP_FREE (saved_components);
|
|
}
|
|
|
|
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
|
|
For the moment we detect only the memset zero pattern. */
|
|
|
|
static void
|
|
classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
|
|
{
|
|
bitmap_iterator bi;
|
|
unsigned i;
|
|
tree nb_iter;
|
|
data_reference_p single_load, single_store;
|
|
|
|
partition->kind = PKIND_NORMAL;
|
|
partition->main_dr = NULL;
|
|
partition->secondary_dr = NULL;
|
|
|
|
if (!flag_tree_loop_distribute_patterns)
|
|
return;
|
|
|
|
/* Perform general partition disqualification for builtins. */
|
|
nb_iter = number_of_exit_cond_executions (loop);
|
|
if (!nb_iter || nb_iter == chrec_dont_know)
|
|
return;
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
|
|
{
|
|
gimple stmt = RDG_STMT (rdg, i);
|
|
|
|
if (gimple_has_volatile_ops (stmt))
|
|
return;
|
|
|
|
/* If the stmt has uses outside of the loop fail.
|
|
??? If the stmt is generated in another partition that
|
|
is not created as builtin we can ignore this. */
|
|
if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "not generating builtin, partition has "
|
|
"scalar uses outside of the loop\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Detect memset and memcpy. */
|
|
single_load = NULL;
|
|
single_store = NULL;
|
|
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
|
|
{
|
|
gimple stmt = RDG_STMT (rdg, i);
|
|
data_reference_p dr;
|
|
unsigned j;
|
|
|
|
if (gimple_code (stmt) == GIMPLE_PHI)
|
|
continue;
|
|
|
|
/* Any scalar stmts are ok. */
|
|
if (!gimple_vuse (stmt))
|
|
continue;
|
|
|
|
/* Otherwise just regular loads/stores. */
|
|
if (!gimple_assign_single_p (stmt))
|
|
return;
|
|
|
|
/* But exactly one store and/or load. */
|
|
for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
|
|
{
|
|
if (DR_IS_READ (dr))
|
|
{
|
|
if (single_load != NULL)
|
|
return;
|
|
single_load = dr;
|
|
}
|
|
else
|
|
{
|
|
if (single_store != NULL)
|
|
return;
|
|
single_store = dr;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (single_store && !single_load)
|
|
{
|
|
gimple stmt = DR_STMT (single_store);
|
|
tree rhs = gimple_assign_rhs1 (stmt);
|
|
if (!(integer_zerop (rhs)
|
|
|| integer_all_onesp (rhs)
|
|
|| real_zerop (rhs)
|
|
|| (TREE_CODE (rhs) == CONSTRUCTOR
|
|
&& !TREE_CLOBBER_P (rhs))
|
|
|| (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
|
|
&& (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
|
|
== TYPE_MODE (unsigned_char_type_node)))))
|
|
return;
|
|
if (TREE_CODE (rhs) == SSA_NAME
|
|
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
|
|
&& flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
|
|
return;
|
|
if (!adjacent_dr_p (single_store))
|
|
return;
|
|
partition->kind = PKIND_MEMSET;
|
|
partition->main_dr = single_store;
|
|
}
|
|
else if (single_store && single_load)
|
|
{
|
|
gimple store = DR_STMT (single_store);
|
|
gimple load = DR_STMT (single_load);
|
|
/* Direct aggregate copy or via an SSA name temporary. */
|
|
if (load != store
|
|
&& gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
|
|
return;
|
|
if (!adjacent_dr_p (single_store)
|
|
|| !adjacent_dr_p (single_load)
|
|
|| !operand_equal_p (DR_STEP (single_store),
|
|
DR_STEP (single_load), 0))
|
|
return;
|
|
/* Now check that if there is a dependence this dependence is
|
|
of a suitable form for memmove. */
|
|
vec<loop_p> loops = vec<loop_p>();
|
|
ddr_p ddr;
|
|
loops.safe_push (loop);
|
|
ddr = initialize_data_dependence_relation (single_load, single_store,
|
|
loops);
|
|
compute_affine_dependence (ddr, loop);
|
|
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
|
{
|
|
free_dependence_relation (ddr);
|
|
loops.release ();
|
|
return;
|
|
}
|
|
if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
|
|
{
|
|
if (DDR_NUM_DIST_VECTS (ddr) == 0)
|
|
{
|
|
free_dependence_relation (ddr);
|
|
loops.release ();
|
|
return;
|
|
}
|
|
lambda_vector dist_v;
|
|
FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
|
|
{
|
|
int dist = dist_v[index_in_loop_nest (loop->num,
|
|
DDR_LOOP_NEST (ddr))];
|
|
if (dist > 0 && !DDR_REVERSED_P (ddr))
|
|
{
|
|
free_dependence_relation (ddr);
|
|
loops.release ();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
free_dependence_relation (ddr);
|
|
loops.release ();
|
|
partition->kind = PKIND_MEMCPY;
|
|
partition->main_dr = single_store;
|
|
partition->secondary_dr = single_load;
|
|
}
|
|
}
|
|
|
|
/* For a data reference REF, return the declaration of its base
|
|
address or NULL_TREE if the base is not determined. */
|
|
|
|
static tree
|
|
ref_base_address (data_reference_p dr)
|
|
{
|
|
tree base_address = DR_BASE_ADDRESS (dr);
|
|
if (base_address
|
|
&& TREE_CODE (base_address) == ADDR_EXPR)
|
|
return TREE_OPERAND (base_address, 0);
|
|
|
|
return base_address;
|
|
}
|
|
|
|
/* Returns true when PARTITION1 and PARTITION2 have similar memory
|
|
accesses in RDG. */
|
|
|
|
static bool
|
|
similar_memory_accesses (struct graph *rdg, partition_t partition1,
|
|
partition_t partition2)
|
|
{
|
|
unsigned i, j, k, l;
|
|
bitmap_iterator bi, bj;
|
|
data_reference_p ref1, ref2;
|
|
|
|
/* First check whether in the intersection of the two partitions are
|
|
any loads or stores. Common loads are the situation that happens
|
|
most often. */
|
|
EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
|
|
if (RDG_MEM_WRITE_STMT (rdg, i)
|
|
|| RDG_MEM_READS_STMT (rdg, i))
|
|
return true;
|
|
|
|
/* Then check all data-references against each other. */
|
|
EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
|
|
if (RDG_MEM_WRITE_STMT (rdg, i)
|
|
|| RDG_MEM_READS_STMT (rdg, i))
|
|
EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
|
|
if (RDG_MEM_WRITE_STMT (rdg, j)
|
|
|| RDG_MEM_READS_STMT (rdg, j))
|
|
{
|
|
FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
|
|
{
|
|
tree base1 = ref_base_address (ref1);
|
|
if (base1)
|
|
FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
|
|
if (base1 == ref_base_address (ref2))
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Aggregate several components into a useful partition that is
|
|
registered in the PARTITIONS vector. Partitions will be
|
|
distributed in different loops. */
|
|
|
|
static void
|
|
rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
|
|
vec<int> *other_stores,
|
|
vec<partition_t> *partitions, bitmap processed)
|
|
{
|
|
int i;
|
|
rdgc x;
|
|
partition_t partition = partition_alloc (NULL);
|
|
|
|
FOR_EACH_VEC_ELT (components, i, x)
|
|
{
|
|
partition_t np;
|
|
int v = x->vertices[0];
|
|
|
|
if (bitmap_bit_p (processed, v))
|
|
continue;
|
|
|
|
np = build_rdg_partition_for_component (rdg, x);
|
|
bitmap_ior_into (partition->stmts, np->stmts);
|
|
partition->has_writes = partition_has_writes (np);
|
|
bitmap_ior_into (processed, np->stmts);
|
|
partition_free (np);
|
|
|
|
if (partition_has_writes (partition))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "ldist useful partition:\n");
|
|
dump_bitmap (dump_file, partition->stmts);
|
|
}
|
|
|
|
partitions->safe_push (partition);
|
|
partition = partition_alloc (NULL);
|
|
}
|
|
}
|
|
|
|
/* Add the nodes from the RDG that were not marked as processed, and
|
|
that are used outside the current loop. These are scalar
|
|
computations that are not yet part of previous partitions. */
|
|
for (i = 0; i < rdg->n_vertices; i++)
|
|
if (!bitmap_bit_p (processed, i)
|
|
&& rdg_defs_used_in_other_loops_p (rdg, i))
|
|
other_stores->safe_push (i);
|
|
|
|
/* If there are still statements left in the OTHER_STORES array,
|
|
create other components and partitions with these stores and
|
|
their dependences. */
|
|
if (other_stores->length () > 0)
|
|
{
|
|
vec<rdgc> comps;
|
|
comps.create (3);
|
|
vec<int> foo;
|
|
foo.create (3);
|
|
|
|
rdg_build_components (rdg, *other_stores, &comps);
|
|
rdg_build_partitions (rdg, comps, &foo, partitions, processed);
|
|
|
|
foo.release ();
|
|
free_rdg_components (comps);
|
|
}
|
|
|
|
/* If there is something left in the last partition, save it. */
|
|
if (bitmap_count_bits (partition->stmts) > 0)
|
|
partitions->safe_push (partition);
|
|
else
|
|
partition_free (partition);
|
|
}
|
|
|
|
/* Dump to FILE the PARTITIONS. */
|
|
|
|
static void
|
|
dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
|
|
{
|
|
int i;
|
|
partition_t partition;
|
|
|
|
FOR_EACH_VEC_ELT (partitions, i, partition)
|
|
debug_bitmap_file (file, partition->stmts);
|
|
}
|
|
|
|
/* Debug PARTITIONS. */
|
|
extern void debug_rdg_partitions (vec<partition_t> );
|
|
|
|
DEBUG_FUNCTION void
|
|
debug_rdg_partitions (vec<partition_t> partitions)
|
|
{
|
|
dump_rdg_partitions (stderr, partitions);
|
|
}
|
|
|
|
/* Returns the number of read and write operations in the RDG. */
|
|
|
|
static int
|
|
number_of_rw_in_rdg (struct graph *rdg)
|
|
{
|
|
int i, res = 0;
|
|
|
|
for (i = 0; i < rdg->n_vertices; i++)
|
|
{
|
|
if (RDG_MEM_WRITE_STMT (rdg, i))
|
|
++res;
|
|
|
|
if (RDG_MEM_READS_STMT (rdg, i))
|
|
++res;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Returns the number of read and write operations in a PARTITION of
|
|
the RDG. */
|
|
|
|
static int
|
|
number_of_rw_in_partition (struct graph *rdg, partition_t partition)
|
|
{
|
|
int res = 0;
|
|
unsigned i;
|
|
bitmap_iterator ii;
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
|
|
{
|
|
if (RDG_MEM_WRITE_STMT (rdg, i))
|
|
++res;
|
|
|
|
if (RDG_MEM_READS_STMT (rdg, i))
|
|
++res;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Returns true when one of the PARTITIONS contains all the read or
|
|
write operations of RDG. */
|
|
|
|
static bool
|
|
partition_contains_all_rw (struct graph *rdg,
|
|
vec<partition_t> partitions)
|
|
{
|
|
int i;
|
|
partition_t partition;
|
|
int nrw = number_of_rw_in_rdg (rdg);
|
|
|
|
FOR_EACH_VEC_ELT (partitions, i, partition)
|
|
if (nrw == number_of_rw_in_partition (rdg, partition))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Generate code from STARTING_VERTICES in RDG. Returns the number of
|
|
distributed loops. */
|
|
|
|
static int
|
|
ldist_gen (struct loop *loop, struct graph *rdg,
|
|
vec<int> starting_vertices)
|
|
{
|
|
int i, nbp;
|
|
vec<rdgc> components;
|
|
components.create (3);
|
|
vec<partition_t> partitions;
|
|
partitions.create (3);
|
|
vec<int> other_stores;
|
|
other_stores.create (3);
|
|
partition_t partition;
|
|
bitmap processed = BITMAP_ALLOC (NULL);
|
|
bool any_builtin;
|
|
|
|
remaining_stmts = BITMAP_ALLOC (NULL);
|
|
upstream_mem_writes = BITMAP_ALLOC (NULL);
|
|
|
|
for (i = 0; i < rdg->n_vertices; i++)
|
|
{
|
|
bitmap_set_bit (remaining_stmts, i);
|
|
|
|
/* Save in OTHER_STORES all the memory writes that are not in
|
|
STARTING_VERTICES. */
|
|
if (RDG_MEM_WRITE_STMT (rdg, i))
|
|
{
|
|
int v;
|
|
unsigned j;
|
|
bool found = false;
|
|
|
|
FOR_EACH_VEC_ELT (starting_vertices, j, v)
|
|
if (i == v)
|
|
{
|
|
found = true;
|
|
break;
|
|
}
|
|
|
|
if (!found)
|
|
other_stores.safe_push (i);
|
|
}
|
|
}
|
|
|
|
mark_nodes_having_upstream_mem_writes (rdg);
|
|
rdg_build_components (rdg, starting_vertices, &components);
|
|
rdg_build_partitions (rdg, components, &other_stores, &partitions,
|
|
processed);
|
|
BITMAP_FREE (processed);
|
|
|
|
any_builtin = false;
|
|
FOR_EACH_VEC_ELT (partitions, i, partition)
|
|
{
|
|
classify_partition (loop, rdg, partition);
|
|
any_builtin |= partition_builtin_p (partition);
|
|
}
|
|
|
|
/* If we are only distributing patterns fuse all partitions that
|
|
were not properly classified as builtins. Else fuse partitions
|
|
with similar memory accesses. */
|
|
if (!flag_tree_loop_distribution)
|
|
{
|
|
partition_t into;
|
|
/* If we did not detect any builtin simply bail out. */
|
|
if (!any_builtin)
|
|
{
|
|
nbp = 0;
|
|
goto ldist_done;
|
|
}
|
|
/* Only fuse adjacent non-builtin partitions, see PR53616.
|
|
??? Use dependence information to improve partition ordering. */
|
|
i = 0;
|
|
do
|
|
{
|
|
for (; partitions.iterate (i, &into); ++i)
|
|
if (!partition_builtin_p (into))
|
|
break;
|
|
for (++i; partitions.iterate (i, &partition); ++i)
|
|
if (!partition_builtin_p (partition))
|
|
{
|
|
bitmap_ior_into (into->stmts, partition->stmts);
|
|
partitions.ordered_remove (i);
|
|
i--;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
while ((unsigned) i < partitions.length ());
|
|
}
|
|
else
|
|
{
|
|
partition_t into;
|
|
int j;
|
|
for (i = 0; partitions.iterate (i, &into); ++i)
|
|
{
|
|
if (partition_builtin_p (into))
|
|
continue;
|
|
for (j = i + 1;
|
|
partitions.iterate (j, &partition); ++j)
|
|
{
|
|
if (!partition_builtin_p (partition)
|
|
/* ??? The following is horribly inefficient,
|
|
we are re-computing and analyzing data-references
|
|
of the stmts in the partitions all the time. */
|
|
&& similar_memory_accesses (rdg, into, partition))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "fusing partitions\n");
|
|
dump_bitmap (dump_file, into->stmts);
|
|
dump_bitmap (dump_file, partition->stmts);
|
|
fprintf (dump_file, "because they have similar "
|
|
"memory accesses\n");
|
|
}
|
|
bitmap_ior_into (into->stmts, partition->stmts);
|
|
partitions.ordered_remove (j);
|
|
j--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
nbp = partitions.length ();
|
|
if (nbp == 0
|
|
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
|
|
|| (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
|
|
{
|
|
nbp = 0;
|
|
goto ldist_done;
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
dump_rdg_partitions (dump_file, partitions);
|
|
|
|
FOR_EACH_VEC_ELT (partitions, i, partition)
|
|
generate_code_for_partition (loop, partition, i < nbp - 1);
|
|
|
|
ldist_done:
|
|
|
|
BITMAP_FREE (remaining_stmts);
|
|
BITMAP_FREE (upstream_mem_writes);
|
|
|
|
FOR_EACH_VEC_ELT (partitions, i, partition)
|
|
partition_free (partition);
|
|
|
|
other_stores.release ();
|
|
partitions.release ();
|
|
free_rdg_components (components);
|
|
return nbp;
|
|
}
|
|
|
|
/* Distributes the code from LOOP in such a way that producer
|
|
statements are placed before consumer statements. When STMTS is
|
|
NULL, performs the maximal distribution, if STMTS is not NULL,
|
|
tries to separate only these statements from the LOOP's body.
|
|
Returns the number of distributed loops. */
|
|
|
|
static int
|
|
distribute_loop (struct loop *loop, vec<gimple> stmts)
|
|
{
|
|
int res = 0;
|
|
struct graph *rdg;
|
|
gimple s;
|
|
unsigned i;
|
|
vec<int> vertices;
|
|
vec<ddr_p> dependence_relations;
|
|
vec<data_reference_p> datarefs;
|
|
vec<loop_p> loop_nest;
|
|
|
|
datarefs.create (10);
|
|
dependence_relations.create (100);
|
|
loop_nest.create (3);
|
|
rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
|
|
|
|
if (!rdg)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file,
|
|
"FIXME: Loop %d not distributed: failed to build the RDG.\n",
|
|
loop->num);
|
|
|
|
free_dependence_relations (dependence_relations);
|
|
free_data_refs (datarefs);
|
|
loop_nest.release ();
|
|
return res;
|
|
}
|
|
|
|
vertices.create (3);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
dump_rdg (dump_file, rdg);
|
|
|
|
FOR_EACH_VEC_ELT (stmts, i, s)
|
|
{
|
|
int v = rdg_vertex_for_stmt (rdg, s);
|
|
|
|
if (v >= 0)
|
|
{
|
|
vertices.safe_push (v);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file,
|
|
"ldist asked to generate code for vertex %d\n", v);
|
|
}
|
|
}
|
|
|
|
res = ldist_gen (loop, rdg, vertices);
|
|
vertices.release ();
|
|
free_rdg (rdg);
|
|
free_dependence_relations (dependence_relations);
|
|
free_data_refs (datarefs);
|
|
loop_nest.release ();
|
|
return res;
|
|
}
|
|
|
|
/* Distribute all loops in the current function. */
|
|
|
|
static unsigned int
|
|
tree_loop_distribution (void)
|
|
{
|
|
struct loop *loop;
|
|
loop_iterator li;
|
|
bool changed = false;
|
|
basic_block bb;
|
|
|
|
FOR_ALL_BB (bb)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
gimple_set_uid (gsi_stmt (gsi), -1);
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
gimple_set_uid (gsi_stmt (gsi), -1);
|
|
}
|
|
|
|
/* We can at the moment only distribute non-nested loops, thus restrict
|
|
walking to innermost loops. */
|
|
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
|
{
|
|
vec<gimple> work_list = vec<gimple>();
|
|
basic_block *bbs;
|
|
int num = loop->num;
|
|
int nb_generated_loops = 0;
|
|
unsigned int i;
|
|
|
|
/* If the loop doesn't have a single exit we will fail anyway,
|
|
so do that early. */
|
|
if (!single_exit (loop))
|
|
continue;
|
|
|
|
/* Only optimize hot loops. */
|
|
if (!optimize_loop_for_speed_p (loop))
|
|
continue;
|
|
|
|
/* Only distribute loops with a header and latch for now. */
|
|
if (loop->num_nodes > 2)
|
|
continue;
|
|
|
|
/* Initialize the worklist with stmts we seed the partitions with. */
|
|
bbs = get_loop_body_in_dom_order (loop);
|
|
for (i = 0; i < loop->num_nodes; ++i)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple stmt = gsi_stmt (gsi);
|
|
/* Only distribute stores for now.
|
|
??? We should also try to distribute scalar reductions,
|
|
thus SSA defs that have scalar uses outside of the loop. */
|
|
if (!gimple_assign_single_p (stmt)
|
|
|| is_gimple_reg (gimple_assign_lhs (stmt)))
|
|
continue;
|
|
|
|
work_list.safe_push (stmt);
|
|
}
|
|
}
|
|
free (bbs);
|
|
|
|
if (work_list.length () > 0)
|
|
nb_generated_loops = distribute_loop (loop, work_list);
|
|
|
|
if (nb_generated_loops > 0)
|
|
changed = true;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
if (nb_generated_loops > 1)
|
|
fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
|
|
num, nb_generated_loops);
|
|
else
|
|
fprintf (dump_file, "Loop %d is the same.\n", num);
|
|
}
|
|
|
|
work_list.release ();
|
|
}
|
|
|
|
if (changed)
|
|
{
|
|
mark_virtual_operands_for_renaming (cfun);
|
|
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
|
|
}
|
|
|
|
#ifdef ENABLE_CHECKING
|
|
verify_loop_structure ();
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool
|
|
gate_tree_loop_distribution (void)
|
|
{
|
|
return flag_tree_loop_distribution
|
|
|| flag_tree_loop_distribute_patterns;
|
|
}
|
|
|
|
struct gimple_opt_pass pass_loop_distribution =
|
|
{
|
|
{
|
|
GIMPLE_PASS,
|
|
"ldist", /* name */
|
|
OPTGROUP_LOOP, /* optinfo_flags */
|
|
gate_tree_loop_distribution, /* gate */
|
|
tree_loop_distribution, /* execute */
|
|
NULL, /* sub */
|
|
NULL, /* next */
|
|
0, /* static_pass_number */
|
|
TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
|
|
PROP_cfg | PROP_ssa, /* properties_required */
|
|
0, /* properties_provided */
|
|
0, /* properties_destroyed */
|
|
0, /* todo_flags_start */
|
|
TODO_ggc_collect
|
|
| TODO_verify_ssa /* todo_flags_finish */
|
|
}
|
|
};
|