+++ /dev/null
-This patch mostly rewrites tree-ssa-math-opts.c to insert the reciprocal
-computations *near the uses* and not near the definitions. This is more
-efficient, gives a more elegant algorithm, supports -ftrapping-math
-cases, and does not need any special casing to fix PR23948 (a 4.1
-regression) and other bugs that were already fixed in the pass (e.g.
-PR23109 and PR23234).
-
-The pass will insert multiple reciprocal computations, under these rules:
-
-1) with -fno-trapping-math at least two divides should postdominate the
-computation.
-
-2) with -ftrapping-math, in addition, the computation will be in a basic
-block that already holds a divide.
-
-3) if a computation is present in a dominator, it can be reused.
-
-The way that this was implemented was to construct a copy of the
-dominator tree, limited to blocks that include a divide, and their
-nearest common dominators.
-
-The tree that can be easily walked and annotated with the number of
-divides in the block or (later in the algorithm) postdominating the
-block. It is also walked to insert the computations according to the
-above rules. The final replacement of divides by multiplies does not
-need a dominator tree walk because we store the info in bb->aux.
-
-Loop-invariant motion can also do this optimization, and the new
-algorithm can merge computations that are hoisted by LIM. For this
-reason I've moved the pass after LIM.
-
-Bootstrapped/regtested i686-pc-linux-gnu, SPECint+SPECfp shows no change
-when compiled with "-O2 -ffast-math". The new testcases (together with
-the existing ones) give complete coverage of insert_bb and
-insert_reciprocals.
-
-*** gcc/gcc/Makefile.in 14 Sep 2005 09:26:41 -0000 1.1541
---- gcc/gcc/Makefile.in 24 Sep 2005 11:47:33 -0000
-***************
-*** 1908,1914 ****
- $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
- hard-reg-set.h
- tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
-! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
- tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
- $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
- function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
---- 1908,1915 ----
- $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
- hard-reg-set.h
- tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
-! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) \
-! alloc-pool.h $(BASIC_BLOCK_H)
- tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
- $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
- function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
-*** gcc/gcc/passes.c 9 Sep 2005 00:46:38 -0000 2.111
---- gcc/gcc/passes.c 24 Sep 2005 11:40:35 -0000
-***************
-*** 522,533 ****
- we add may_alias right after fold builtins
- which can create arbitrary GIMPLE. */
- NEXT_PASS (pass_may_alias);
-- NEXT_PASS (pass_cse_reciprocals);
- NEXT_PASS (pass_split_crit_edges);
- NEXT_PASS (pass_reassoc);
- NEXT_PASS (pass_pre);
- NEXT_PASS (pass_sink_code);
- NEXT_PASS (pass_tree_loop);
- NEXT_PASS (pass_dominator);
- NEXT_PASS (pass_copy_prop);
- NEXT_PASS (pass_cd_dce);
---- 501,512 ----
- we add may_alias right after fold builtins
- which can create arbitrary GIMPLE. */
- NEXT_PASS (pass_may_alias);
- NEXT_PASS (pass_split_crit_edges);
- NEXT_PASS (pass_reassoc);
- NEXT_PASS (pass_pre);
- NEXT_PASS (pass_sink_code);
- NEXT_PASS (pass_tree_loop);
-+ NEXT_PASS (pass_cse_reciprocals);
- NEXT_PASS (pass_dominator);
- NEXT_PASS (pass_copy_prop);
- NEXT_PASS (pass_cd_dce);
---- gcc/gcc/tree-ssa-math-opts.c 9 Aug 2005 03:28:32 -0000 2.5
-+++ gcc/gcc/tree-ssa-math-opts.c 25 Sep 2005 11:39:17 -0000
-@@ -47,88 +47,355 @@ Software Foundation, 51 Franklin Street,
- #include "real.h"
- #include "timevar.h"
- #include "tree-pass.h"
-+#include "alloc-pool.h"
-+#include "basic-block.h"
-
--static bool
--gate_cse_reciprocals (void)
-+struct occurrence {
-+ basic_block bb;
-+ int num_divides;
-+ tree recip_def;
-+ tree recip_def_stmt;
-+ struct occurrence *children;
-+ struct occurrence *next;
-+ bool insert_before_divide;
-+};
-+
-+static struct occurrence *occ_head;
-+static alloc_pool occ_pool;
-+
-+
-+/* Allocate and return a new struct occurrence for basic block BB, and
-+ whose children list is headed by CHILDREN. */
-+static struct occurrence *
-+occ_new (basic_block bb, struct occurrence *children)
- {
-- return optimize && !optimize_size && flag_unsafe_math_optimizations;
-+ struct occurrence *occ;
-+
-+ occ = bb->aux = pool_alloc (occ_pool);
-+ occ->bb = bb;
-+ occ->num_divides = 0;
-+ occ->recip_def = NULL;
-+ occ->recip_def_stmt = NULL;
-+ occ->children = children;
-+ occ->next = NULL;
-+ occ->insert_before_divide = false;
-+ return occ;
- }
-
--/* Where to put the statement computing a reciprocal. */
--enum place_reciprocal
-+
-+/* Insert BB into our subset of the dominator tree. PHEAD points to a
-+ list of "struct occurrence"s, one per basic block, having IDOM as
-+ their common dominator.
-+
-+ We try to insert BB as deep as possible in the tree, and we also
-+ insert any other block that is a common dominator for BB and one
-+ block already in the tree. */
-+
-+static void
-+insert_bb (basic_block bb, struct occurrence *occ_bb, basic_block idom,
-+ struct occurrence **p_head)
- {
-- PR_BEFORE_BSI, /* Put it using bsi_insert_before. */
-- PR_AFTER_BSI, /* Put it using bsi_insert_after. */
-- PR_ON_ENTRY_EDGE /* Put it on the edge between the entry
-- and the first basic block. */
--};
-+ struct occurrence *occ, *occ_dom, **p_occ;
-
--/* Check if DEF's uses include more than one floating-point division,
-- and if so replace them by multiplications with the reciprocal. Add
-- the statement computing the reciprocal according to WHERE.
-+ for (p_occ = p_head; (occ = *p_occ) != NULL; )
-+ {
-+ basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ->bb, bb);
-+ if (dom == bb)
-+ {
-+ /* BB dominates OCC->BB. OCC becomes OCC_BB's child. */
-+ *p_occ = occ->next;
-+ occ->next = occ_bb->children;
-+ occ_bb->children = occ;
-+
-+ /* Try the next block (it may as well be dominated by BB). */
-+ }
-+
-+ else if (dom == occ->bb)
-+ {
-+ /* OCC->BB dominates BB. Tail recurse to look deeper. */
-+ insert_bb (bb, occ_bb, dom, &occ->children);
-+ return;
-+ }
-+
-+ else if (dom != idom)
-+ {
-+ gcc_assert (!dom->aux);
-+
-+ /* There is a dominator between IDOM and BB, add it and make two
-+ children out of OCC_BB and OCC. */
-+ *p_occ = occ->next;
-+ occ_dom = occ_new (dom, occ_bb);
-+ occ_bb->next = occ;
-+ occ->next = NULL;
-+
-+ /* None of the previous blocks has DOM as a dominator, so tail
-+ recurse would reexamine them uselessly. Switching BB with DOM,
-+ we go on and look for blocks dominated by DOM. */
-+ bb = dom;
-+ occ_bb = occ_dom;
-+ }
-+
-+ else
-+ {
-+ /* Nothing special, go on with the next element. */
-+ p_occ = &occ->next;
-+ }
-+ }
-+
-+ /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
-+ occ_bb->next = *p_head;
-+ *p_head = occ_bb;
-+}
-+
-+/* Register that we found a divide in BB. */
-+
-+static inline void
-+found_divide (basic_block bb)
-+{
-+ struct occurrence *occ;
-+
-+ occ = (struct occurrence *) bb->aux;
-+ if (!occ)
-+ {
-+ occ = occ_new (bb, NULL);
-+ insert_bb (bb, occ, ENTRY_BLOCK_PTR, &occ_head);
-+ }
-+
-+ occ->insert_before_divide = true;
-+ occ->num_divides++;
-+}
-+
-+
-+/* Return the one of two successor of BB that is not reachable by a
-+ reached by a complex edge, if there is one. Else, return BB.
-+ This catches most cases in C++ where the result of a function call
-+ is assigned to a variable. */
-+
-+static basic_block
-+sole_noncomplex_succ (basic_block bb)
-+{
-+ edge e0, e1;
-+ if (EDGE_COUNT (bb->succs) != 2)
-+ return bb;
-+
-+ e0 = EDGE_SUCC (bb, 0);
-+ e1 = EDGE_SUCC (bb, 1);
-+ if (e0->flags & EDGE_COMPLEX)
-+ return e1->dest;
-+ if (e1->flags & EDGE_COMPLEX)
-+ return e0->dest;
-+
-+ return bb;
-+}
-+
-+
-+/* Compute the number of divides that postdominate each block in OCC and
-+ its children. */
-
-- Does not check the type of DEF, nor that DEF is a GIMPLE register.
-- This is done in the caller for speed, because otherwise this routine
-- would be called for every definition and phi node. */
- static void
--execute_cse_reciprocals_1 (block_stmt_iterator *bsi, tree def,
-- enum place_reciprocal where)
-+compute_merit (struct occurrence *occ)
- {
-- use_operand_p use_p;
-- imm_use_iterator use_iter;
-- tree t, new_stmt, type;
-- int count = 0;
-- bool ok = !flag_trapping_math;
-+ struct occurrence *occ_child;
-+ basic_block dom = occ->bb;
-
-- /* Find uses. */
-- FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
-+ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
- {
-- tree use_stmt = USE_STMT (use_p);
-- if (TREE_CODE (use_stmt) == MODIFY_EXPR
-- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
-- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
-+ basic_block bb;
-+ if (occ_child->children)
-+ compute_merit (occ_child);
-+
-+ if (flag_exceptions)
-+ bb = sole_noncomplex_succ (dom);
-+ else
-+ bb = dom;
-+
-+ if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
-+ occ->num_divides += occ_child->num_divides;
-+ }
-+}
-+
-+/* TODO: Check how this compares with bsi_after_labels. Return an iterator
-+ pointing after the last LABEL_EXPR, or before the first statement if there
-+ is no LABEL_EXPR. */
-+
-+static block_stmt_iterator
-+bsi_before_first_stmt (basic_block bb)
-+{
-+ block_stmt_iterator bsi;
-+ for (bsi = bsi_start (bb);
-+ !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
-+ bsi_next (&bsi))
-+ ;
-+
-+ return bsi;
-+}
-+
-+/* Return whether USE_STMT is a floating-point division by DEF. */
-+static inline bool
-+is_divide_by (tree use_stmt, tree def)
-+{
-+ return TREE_CODE (use_stmt) == MODIFY_EXPR
-+ && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
-+ && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
-+}
-+
-+/* Walk the subset of the dominator tree rooted at OCC, setting the
-+ RECIP_DEF field to a definition of 1.0 / DEF that can be used in
-+ the given basic block. The field may be left NULL, of course,
-+ if it is not possible or profitable to do the optimization.
-+
-+ DEF_BSI is an iterator pointing at the statement defining DEF.
-+ If RECIP_DEF is set, a dominator already has a computation that can
-+ be used. */
-+
-+static void
-+insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
-+ tree def, tree recip_def)
-+{
-+ tree type, new_stmt;
-+ block_stmt_iterator bsi;
-+ struct occurrence *occ_child;
-+
-+ if (!recip_def
-+ && (occ->insert_before_divide || !flag_trapping_math)
-+ && occ->num_divides >= 2)
-+ {
-+ /* Make a variable with the replacement and substitute it. */
-+ type = TREE_TYPE (def);
-+ recip_def = make_rename_temp (type, "reciptmp");
-+ new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
-+ fold_build2 (RDIV_EXPR, type,
-+ build_real (type, dconst1), def));
-+
-+
-+ if (occ->insert_before_divide)
- {
-- ++count;
-- /* Check if this use post-dominates the insertion point. */
-- if (ok || dominated_by_p (CDI_POST_DOMINATORS, bsi->bb,
-- bb_for_stmt (use_stmt)))
-- ok = true;
-+ /* Case 1: insert before an existing divide. */
-+ bsi = bsi_before_first_stmt (occ->bb);
-+ while (!bsi_end_p (bsi) && !is_divide_by (bsi_stmt (bsi), def))
-+ bsi_next (&bsi);
-+
-+ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
-+ }
-+ else if (def_bsi && occ->bb == def_bsi->bb)
-+ {
-+ /* Case 2: insert right after the definition. Note that this will
-+ never happen if the definition statement can throw, because in
-+ that case the sole successor of the statement's basic block will
-+ dominate all the uses as well. */
-+ bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
-+ }
-+ else
-+ {
-+ /* Case 3: insert in a basic block not containing defs/uses. */
-+ bsi = bsi_before_first_stmt (occ->bb);
-+ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
- }
-- if (count >= 2 && ok)
-- break;
-+
-+ occ->recip_def_stmt = new_stmt;
- }
-
-- if (count < 2 || !ok)
-- return;
-+ occ->recip_def = recip_def;
-+ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
-+ insert_reciprocals (def_bsi, occ_child, def, recip_def);
-+}
-+
-
-- /* Make a variable with the replacement and substitute it. */
-- type = TREE_TYPE (def);
-- t = make_rename_temp (type, "reciptmp");
-- new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
-- fold_build2 (RDIV_EXPR, type, build_real (type, dconst1),
-- def));
--
-- if (where == PR_BEFORE_BSI)
-- bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
-- else if (where == PR_AFTER_BSI)
-- bsi_insert_after (bsi, new_stmt, BSI_NEW_STMT);
-- else if (where == PR_ON_ENTRY_EDGE)
-- bsi_insert_on_edge (single_succ_edge (ENTRY_BLOCK_PTR), new_stmt);
-+/* Replace the divide at USE_P with a multiplication by the reciprocal, if
-+ possible. */
-+
-+static inline void
-+replace_reciprocal (use_operand_p use_p)
-+{
-+ tree use_stmt = USE_STMT (use_p);
-+ basic_block bb = bb_for_stmt (use_stmt);
-+ struct occurrence *occ = (struct occurrence *) bb->aux;
-+
-+ if (occ->recip_def && use_stmt != occ->recip_def_stmt)
-+ {
-+ TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
-+ SET_USE (use_p, occ->recip_def);
-+ }
-+}
-+
-+
-+/* Free OCC and return one more "struct occurrence" to be freed. */
-+
-+static struct occurrence *
-+free_bb (struct occurrence *occ)
-+{
-+ struct occurrence *child, *next;
-+
-+ /* First get the two pointers hanging off OCC. */
-+ next = occ->next;
-+ child = occ->children;
-+ occ->bb->aux = NULL;
-+ pool_free (occ_pool, occ);
-+
-+ /* Now ensure that we don't recurse unless it is necessary. */
-+ if (!child)
-+ return next;
- else
-- gcc_unreachable ();
-+ {
-+ while (next)
-+ next = free_bb (next);
-+
-+ return child;
-+ }
-+}
-
-- FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
-+static bool
-+gate_cse_reciprocals (void)
-+{
-+ return optimize && !optimize_size && flag_unsafe_math_optimizations;
-+}
-+
-+/* Look for floating-point divides among DEF's uses, and try to
-+ replace them by multiplications with the reciprocal. Add
-+ as many statements computing the reciprocal as needed.
-+
-+ Does not check the type of DEF, nor that DEF is a GIMPLE register.
-+ This is done in the caller. */
-+
-+static void
-+execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
-+{
-+ use_operand_p use_p;
-+ imm_use_iterator use_iter;
-+ struct occurrence *occ;
-+ int count = 0;
-+
-+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
- {
- tree use_stmt = USE_STMT (use_p);
-- if (use_stmt != new_stmt
-- && TREE_CODE (use_stmt) == MODIFY_EXPR
-- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
-- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
-+ if (is_divide_by (use_stmt, def))
- {
-- TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
-- SET_USE (use_p, t);
-+ found_divide (bb_for_stmt (use_stmt));
-+ count++;
- }
- }
-+
-+ /* Do the expensive part only if we can hope to optimize something. */
-+ if (count >= 2)
-+ {
-+ for (occ = occ_head; occ; occ = occ->next)
-+ {
-+ compute_merit (occ);
-+ insert_reciprocals (def_bsi, occ, def, NULL);
-+ }
-+
-+ FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
-+ {
-+ tree use_stmt = USE_STMT (use_p);
-+ if (is_divide_by (use_stmt, def))
-+ replace_reciprocal (use_p);
-+ }
-+ }
-+
-+ for (occ = occ_head; occ; )
-+ occ = free_bb (occ);
-+
-+ occ_head = NULL;
- }
-
- static void
-@@ -137,34 +404,30 @@ execute_cse_reciprocals (void)
- basic_block bb;
- tree arg;
-
-- if (flag_trapping_math)
-- calculate_dominance_info (CDI_POST_DOMINATORS);
-+ occ_pool = create_alloc_pool ("dominators for recip",
-+ sizeof (struct occurrence),
-+ n_basic_blocks / 3 + 1);
-
-- if (single_succ_p (ENTRY_BLOCK_PTR))
-- for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
-- if (default_def (arg))
-- {
-- block_stmt_iterator bsi;
-- bsi = bsi_start (single_succ (ENTRY_BLOCK_PTR));
-- execute_cse_reciprocals_1 (&bsi, default_def (arg),
-- PR_ON_ENTRY_EDGE);
-- }
-+ calculate_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
-+
-+ FOR_EACH_BB (bb)
-+ bb->aux = NULL;
-+
-+ for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
-+ if (default_def (arg))
-+ execute_cse_reciprocals_1 (NULL, default_def (arg));
-
- FOR_EACH_BB (bb)
- {
-- block_stmt_iterator bsi;
-+ block_stmt_iterator bsi = bsi_before_first_stmt (bb);
- tree phi, def;
-- for (bsi = bsi_start (bb);
-- !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
-- bsi_next (&bsi))
-- ;
-
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- {
- def = PHI_RESULT (phi);
- if (FLOAT_TYPE_P (TREE_TYPE (def))
- && is_gimple_reg (def))
-- execute_cse_reciprocals_1 (&bsi, def, PR_BEFORE_BSI);
-+ execute_cse_reciprocals_1 (NULL, def);
- }
-
- for (; !bsi_end_p (bsi); bsi_next (&bsi))
-@@ -174,15 +437,12 @@ execute_cse_reciprocals (void)
- && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
- && FLOAT_TYPE_P (TREE_TYPE (def))
- && TREE_CODE (def) == SSA_NAME)
-- execute_cse_reciprocals_1 (&bsi, def, PR_AFTER_BSI);
-+ execute_cse_reciprocals_1 (&bsi, def);
- }
- }
-
-- if (flag_trapping_math)
-- free_dominance_info (CDI_POST_DOMINATORS);
--
-- if (single_succ_p (ENTRY_BLOCK_PTR))
-- bsi_commit_one_edge_insert (single_succ_edge (ENTRY_BLOCK_PTR), NULL);
-+ free_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
-+ free_alloc_pool (occ_pool);
- }
-
- struct tree_opt_pass pass_cse_reciprocals =