]> git.pld-linux.org Git - packages/gcc.git/blame - gcc-pr23948.patch
- updated to 4.1.0-20051121r107281, java-awt/qt4 bind disabled now,
[packages/gcc.git] / gcc-pr23948.patch
CommitLineData
ca826263
PS
1This patch mostly rewrites tree-ssa-math-opts.c to insert the reciprocal
2computations *near the uses* and not near the definitions. This is more
3efficient, gives a more elegant algorithm, supports -ftrapping-math
4cases, and does not need any special casing to fix PR23948 (a 4.1
5regression) and other bugs that were already fixed in the pass (e.g.
6PR23109 and PR23234).
7
8The pass will insert multiple reciprocal computations, under these rules:
9
101) with -fno-trapping-math at least two divides should postdominate the
11computation.
12
132) with -ftrapping-math, in addition, the computation will be in a basic
14block that already holds a divide.
15
163) if a computation is present in a dominator, it can be reused.
17
18The way that this was implemented was to construct a copy of the
19dominator tree, limited to blocks that include a divide, and their
20nearest common dominators.
21
22The tree that can be easily walked and annotated with the number of
23divides in the block or (later in the algorithm) postdominating the
24block. It is also walked to insert the computations according to the
25above rules. The final replacement of divides by multiplies does not
26need a dominator tree walk because we store the info in bb->aux.
27
28Loop-invariant motion can also do this optimization, and the new
29algorithm can merge computations that are hoisted by LIM. For this
30reason I've moved the pass after LIM.
31
32Bootstrapped/regtested i686-pc-linux-gnu, SPECint+SPECfp shows no change
33when compiled with "-O2 -ffast-math". The new testcases (together with
34the existing ones) give complete coverage of insert_bb and
35insert_reciprocals.
36
37*** gcc/gcc/Makefile.in 14 Sep 2005 09:26:41 -0000 1.1541
38--- gcc/gcc/Makefile.in 24 Sep 2005 11:47:33 -0000
39***************
40*** 1908,1914 ****
41 $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
42 hard-reg-set.h
43 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
44! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
45 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
46 $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
47 function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
48--- 1908,1915 ----
49 $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
50 hard-reg-set.h
51 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
52! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) \
53! alloc-pool.h $(BASIC_BLOCK_H)
54 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
55 $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
56 function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
57*** gcc/gcc/passes.c 9 Sep 2005 00:46:38 -0000 2.111
58--- gcc/gcc/passes.c 24 Sep 2005 11:40:35 -0000
59***************
60*** 522,533 ****
61 we add may_alias right after fold builtins
62 which can create arbitrary GIMPLE. */
63 NEXT_PASS (pass_may_alias);
64- NEXT_PASS (pass_cse_reciprocals);
65 NEXT_PASS (pass_split_crit_edges);
66 NEXT_PASS (pass_reassoc);
67 NEXT_PASS (pass_pre);
68 NEXT_PASS (pass_sink_code);
69 NEXT_PASS (pass_tree_loop);
70 NEXT_PASS (pass_dominator);
71 NEXT_PASS (pass_copy_prop);
72 NEXT_PASS (pass_cd_dce);
73--- 501,512 ----
74 we add may_alias right after fold builtins
75 which can create arbitrary GIMPLE. */
76 NEXT_PASS (pass_may_alias);
77 NEXT_PASS (pass_split_crit_edges);
78 NEXT_PASS (pass_reassoc);
79 NEXT_PASS (pass_pre);
80 NEXT_PASS (pass_sink_code);
81 NEXT_PASS (pass_tree_loop);
82+ NEXT_PASS (pass_cse_reciprocals);
83 NEXT_PASS (pass_dominator);
84 NEXT_PASS (pass_copy_prop);
85 NEXT_PASS (pass_cd_dce);
86--- gcc/gcc/tree-ssa-math-opts.c 9 Aug 2005 03:28:32 -0000 2.5
87+++ gcc/gcc/tree-ssa-math-opts.c 25 Sep 2005 11:39:17 -0000
88@@ -47,88 +47,355 @@ Software Foundation, 51 Franklin Street,
89 #include "real.h"
90 #include "timevar.h"
91 #include "tree-pass.h"
92+#include "alloc-pool.h"
93+#include "basic-block.h"
94
95-static bool
96-gate_cse_reciprocals (void)
97+struct occurrence {
98+ basic_block bb;
99+ int num_divides;
100+ tree recip_def;
101+ tree recip_def_stmt;
102+ struct occurrence *children;
103+ struct occurrence *next;
104+ bool insert_before_divide;
105+};
106+
107+static struct occurrence *occ_head;
108+static alloc_pool occ_pool;
109+
110+
111+/* Allocate and return a new struct occurrence for basic block BB, and
112+ whose children list is headed by CHILDREN. */
113+static struct occurrence *
114+occ_new (basic_block bb, struct occurrence *children)
115 {
116- return optimize && !optimize_size && flag_unsafe_math_optimizations;
117+ struct occurrence *occ;
118+
119+ occ = bb->aux = pool_alloc (occ_pool);
120+ occ->bb = bb;
121+ occ->num_divides = 0;
122+ occ->recip_def = NULL;
123+ occ->recip_def_stmt = NULL;
124+ occ->children = children;
125+ occ->next = NULL;
126+ occ->insert_before_divide = false;
127+ return occ;
128 }
129
130-/* Where to put the statement computing a reciprocal. */
131-enum place_reciprocal
132+
133+/* Insert BB into our subset of the dominator tree. PHEAD points to a
134+ list of "struct occurrence"s, one per basic block, having IDOM as
135+ their common dominator.
136+
137+ We try to insert BB as deep as possible in the tree, and we also
138+ insert any other block that is a common dominator for BB and one
139+ block already in the tree. */
140+
141+static void
142+insert_bb (basic_block bb, struct occurrence *occ_bb, basic_block idom,
143+ struct occurrence **p_head)
144 {
145- PR_BEFORE_BSI, /* Put it using bsi_insert_before. */
146- PR_AFTER_BSI, /* Put it using bsi_insert_after. */
147- PR_ON_ENTRY_EDGE /* Put it on the edge between the entry
148- and the first basic block. */
149-};
150+ struct occurrence *occ, *occ_dom, **p_occ;
151
152-/* Check if DEF's uses include more than one floating-point division,
153- and if so replace them by multiplications with the reciprocal. Add
154- the statement computing the reciprocal according to WHERE.
155+ for (p_occ = p_head; (occ = *p_occ) != NULL; )
156+ {
157+ basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ->bb, bb);
158+ if (dom == bb)
159+ {
160+ /* BB dominates OCC->BB. OCC becomes OCC_BB's child. */
161+ *p_occ = occ->next;
162+ occ->next = occ_bb->children;
163+ occ_bb->children = occ;
164+
165+ /* Try the next block (it may as well be dominated by BB). */
166+ }
167+
168+ else if (dom == occ->bb)
169+ {
170+ /* OCC->BB dominates BB. Tail recurse to look deeper. */
171+ insert_bb (bb, occ_bb, dom, &occ->children);
172+ return;
173+ }
174+
175+ else if (dom != idom)
176+ {
177+ gcc_assert (!dom->aux);
178+
179+ /* There is a dominator between IDOM and BB, add it and make two
180+ children out of OCC_BB and OCC. */
181+ *p_occ = occ->next;
182+ occ_dom = occ_new (dom, occ_bb);
183+ occ_bb->next = occ;
184+ occ->next = NULL;
185+
186+ /* None of the previous blocks has DOM as a dominator, so tail
187+ recurse would reexamine them uselessly. Switching BB with DOM,
188+ we go on and look for blocks dominated by DOM. */
189+ bb = dom;
190+ occ_bb = occ_dom;
191+ }
192+
193+ else
194+ {
195+ /* Nothing special, go on with the next element. */
196+ p_occ = &occ->next;
197+ }
198+ }
199+
200+ /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
201+ occ_bb->next = *p_head;
202+ *p_head = occ_bb;
203+}
204+
205+/* Register that we found a divide in BB. */
206+
207+static inline void
208+found_divide (basic_block bb)
209+{
210+ struct occurrence *occ;
211+
212+ occ = (struct occurrence *) bb->aux;
213+ if (!occ)
214+ {
215+ occ = occ_new (bb, NULL);
216+ insert_bb (bb, occ, ENTRY_BLOCK_PTR, &occ_head);
217+ }
218+
219+ occ->insert_before_divide = true;
220+ occ->num_divides++;
221+}
222+
223+
224+/* Return the one of two successor of BB that is not reachable by a
225+ reached by a complex edge, if there is one. Else, return BB.
226+ This catches most cases in C++ where the result of a function call
227+ is assigned to a variable. */
228+
229+static basic_block
230+sole_noncomplex_succ (basic_block bb)
231+{
232+ edge e0, e1;
233+ if (EDGE_COUNT (bb->succs) != 2)
234+ return bb;
235+
236+ e0 = EDGE_SUCC (bb, 0);
237+ e1 = EDGE_SUCC (bb, 1);
238+ if (e0->flags & EDGE_COMPLEX)
239+ return e1->dest;
240+ if (e1->flags & EDGE_COMPLEX)
241+ return e0->dest;
242+
243+ return bb;
244+}
245+
246+
247+/* Compute the number of divides that postdominate each block in OCC and
248+ its children. */
249
250- Does not check the type of DEF, nor that DEF is a GIMPLE register.
251- This is done in the caller for speed, because otherwise this routine
252- would be called for every definition and phi node. */
253 static void
254-execute_cse_reciprocals_1 (block_stmt_iterator *bsi, tree def,
255- enum place_reciprocal where)
256+compute_merit (struct occurrence *occ)
257 {
258- use_operand_p use_p;
259- imm_use_iterator use_iter;
260- tree t, new_stmt, type;
261- int count = 0;
262- bool ok = !flag_trapping_math;
263+ struct occurrence *occ_child;
264+ basic_block dom = occ->bb;
265
266- /* Find uses. */
267- FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
268+ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
269 {
270- tree use_stmt = USE_STMT (use_p);
271- if (TREE_CODE (use_stmt) == MODIFY_EXPR
272- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
273- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
274+ basic_block bb;
275+ if (occ_child->children)
276+ compute_merit (occ_child);
277+
278+ if (flag_exceptions)
279+ bb = sole_noncomplex_succ (dom);
280+ else
281+ bb = dom;
282+
283+ if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
284+ occ->num_divides += occ_child->num_divides;
285+ }
286+}
287+
288+/* TODO: Check how this compares with bsi_after_labels. Return an iterator
289+ pointing after the last LABEL_EXPR, or before the first statement if there
290+ is no LABEL_EXPR. */
291+
292+static block_stmt_iterator
293+bsi_before_first_stmt (basic_block bb)
294+{
295+ block_stmt_iterator bsi;
296+ for (bsi = bsi_start (bb);
297+ !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
298+ bsi_next (&bsi))
299+ ;
300+
301+ return bsi;
302+}
303+
304+/* Return whether USE_STMT is a floating-point division by DEF. */
305+static inline bool
306+is_divide_by (tree use_stmt, tree def)
307+{
308+ return TREE_CODE (use_stmt) == MODIFY_EXPR
309+ && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
310+ && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
311+}
312+
313+/* Walk the subset of the dominator tree rooted at OCC, setting the
314+ RECIP_DEF field to a definition of 1.0 / DEF that can be used in
315+ the given basic block. The field may be left NULL, of course,
316+ if it is not possible or profitable to do the optimization.
317+
318+ DEF_BSI is an iterator pointing at the statement defining DEF.
319+ If RECIP_DEF is set, a dominator already has a computation that can
320+ be used. */
321+
322+static void
323+insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
324+ tree def, tree recip_def)
325+{
326+ tree type, new_stmt;
327+ block_stmt_iterator bsi;
328+ struct occurrence *occ_child;
329+
330+ if (!recip_def
331+ && (occ->insert_before_divide || !flag_trapping_math)
332+ && occ->num_divides >= 2)
333+ {
334+ /* Make a variable with the replacement and substitute it. */
335+ type = TREE_TYPE (def);
336+ recip_def = make_rename_temp (type, "reciptmp");
337+ new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
338+ fold_build2 (RDIV_EXPR, type,
339+ build_real (type, dconst1), def));
340+
341+
342+ if (occ->insert_before_divide)
343 {
344- ++count;
345- /* Check if this use post-dominates the insertion point. */
346- if (ok || dominated_by_p (CDI_POST_DOMINATORS, bsi->bb,
347- bb_for_stmt (use_stmt)))
348- ok = true;
349+ /* Case 1: insert before an existing divide. */
350+ bsi = bsi_before_first_stmt (occ->bb);
351+ while (!bsi_end_p (bsi) && !is_divide_by (bsi_stmt (bsi), def))
352+ bsi_next (&bsi);
353+
354+ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
355+ }
356+ else if (def_bsi && occ->bb == def_bsi->bb)
357+ {
358+ /* Case 2: insert right after the definition. Note that this will
359+ never happen if the definition statement can throw, because in
360+ that case the sole successor of the statement's basic block will
361+ dominate all the uses as well. */
362+ bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
363+ }
364+ else
365+ {
366+ /* Case 3: insert in a basic block not containing defs/uses. */
367+ bsi = bsi_before_first_stmt (occ->bb);
368+ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
369 }
370- if (count >= 2 && ok)
371- break;
372+
373+ occ->recip_def_stmt = new_stmt;
374 }
375
376- if (count < 2 || !ok)
377- return;
378+ occ->recip_def = recip_def;
379+ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
380+ insert_reciprocals (def_bsi, occ_child, def, recip_def);
381+}
382+
383
384- /* Make a variable with the replacement and substitute it. */
385- type = TREE_TYPE (def);
386- t = make_rename_temp (type, "reciptmp");
387- new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
388- fold_build2 (RDIV_EXPR, type, build_real (type, dconst1),
389- def));
390-
391- if (where == PR_BEFORE_BSI)
392- bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
393- else if (where == PR_AFTER_BSI)
394- bsi_insert_after (bsi, new_stmt, BSI_NEW_STMT);
395- else if (where == PR_ON_ENTRY_EDGE)
396- bsi_insert_on_edge (single_succ_edge (ENTRY_BLOCK_PTR), new_stmt);
397+/* Replace the divide at USE_P with a multiplication by the reciprocal, if
398+ possible. */
399+
400+static inline void
401+replace_reciprocal (use_operand_p use_p)
402+{
403+ tree use_stmt = USE_STMT (use_p);
404+ basic_block bb = bb_for_stmt (use_stmt);
405+ struct occurrence *occ = (struct occurrence *) bb->aux;
406+
407+ if (occ->recip_def && use_stmt != occ->recip_def_stmt)
408+ {
409+ TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
410+ SET_USE (use_p, occ->recip_def);
411+ }
412+}
413+
414+
415+/* Free OCC and return one more "struct occurrence" to be freed. */
416+
417+static struct occurrence *
418+free_bb (struct occurrence *occ)
419+{
420+ struct occurrence *child, *next;
421+
422+ /* First get the two pointers hanging off OCC. */
423+ next = occ->next;
424+ child = occ->children;
425+ occ->bb->aux = NULL;
426+ pool_free (occ_pool, occ);
427+
428+ /* Now ensure that we don't recurse unless it is necessary. */
429+ if (!child)
430+ return next;
431 else
432- gcc_unreachable ();
433+ {
434+ while (next)
435+ next = free_bb (next);
436+
437+ return child;
438+ }
439+}
440
441- FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
442+static bool
443+gate_cse_reciprocals (void)
444+{
445+ return optimize && !optimize_size && flag_unsafe_math_optimizations;
446+}
447+
448+/* Look for floating-point divides among DEF's uses, and try to
449+ replace them by multiplications with the reciprocal. Add
450+ as many statements computing the reciprocal as needed.
451+
452+ Does not check the type of DEF, nor that DEF is a GIMPLE register.
453+ This is done in the caller. */
454+
455+static void
456+execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
457+{
458+ use_operand_p use_p;
459+ imm_use_iterator use_iter;
460+ struct occurrence *occ;
461+ int count = 0;
462+
463+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
464 {
465 tree use_stmt = USE_STMT (use_p);
466- if (use_stmt != new_stmt
467- && TREE_CODE (use_stmt) == MODIFY_EXPR
468- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
469- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
470+ if (is_divide_by (use_stmt, def))
471 {
472- TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
473- SET_USE (use_p, t);
474+ found_divide (bb_for_stmt (use_stmt));
475+ count++;
476 }
477 }
478+
479+ /* Do the expensive part only if we can hope to optimize something. */
480+ if (count >= 2)
481+ {
482+ for (occ = occ_head; occ; occ = occ->next)
483+ {
484+ compute_merit (occ);
485+ insert_reciprocals (def_bsi, occ, def, NULL);
486+ }
487+
488+ FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
489+ {
490+ tree use_stmt = USE_STMT (use_p);
491+ if (is_divide_by (use_stmt, def))
492+ replace_reciprocal (use_p);
493+ }
494+ }
495+
496+ for (occ = occ_head; occ; )
497+ occ = free_bb (occ);
498+
499+ occ_head = NULL;
500 }
501
502 static void
503@@ -137,34 +404,30 @@ execute_cse_reciprocals (void)
504 basic_block bb;
505 tree arg;
506
507- if (flag_trapping_math)
508- calculate_dominance_info (CDI_POST_DOMINATORS);
509+ occ_pool = create_alloc_pool ("dominators for recip",
510+ sizeof (struct occurrence),
511+ n_basic_blocks / 3 + 1);
512
513- if (single_succ_p (ENTRY_BLOCK_PTR))
514- for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
515- if (default_def (arg))
516- {
517- block_stmt_iterator bsi;
518- bsi = bsi_start (single_succ (ENTRY_BLOCK_PTR));
519- execute_cse_reciprocals_1 (&bsi, default_def (arg),
520- PR_ON_ENTRY_EDGE);
521- }
522+ calculate_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
523+
524+ FOR_EACH_BB (bb)
525+ bb->aux = NULL;
526+
527+ for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
528+ if (default_def (arg))
529+ execute_cse_reciprocals_1 (NULL, default_def (arg));
530
531 FOR_EACH_BB (bb)
532 {
533- block_stmt_iterator bsi;
534+ block_stmt_iterator bsi = bsi_before_first_stmt (bb);
535 tree phi, def;
536- for (bsi = bsi_start (bb);
537- !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
538- bsi_next (&bsi))
539- ;
540
541 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
542 {
543 def = PHI_RESULT (phi);
544 if (FLOAT_TYPE_P (TREE_TYPE (def))
545 && is_gimple_reg (def))
546- execute_cse_reciprocals_1 (&bsi, def, PR_BEFORE_BSI);
547+ execute_cse_reciprocals_1 (NULL, def);
548 }
549
550 for (; !bsi_end_p (bsi); bsi_next (&bsi))
551@@ -174,15 +437,12 @@ execute_cse_reciprocals (void)
552 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
553 && FLOAT_TYPE_P (TREE_TYPE (def))
554 && TREE_CODE (def) == SSA_NAME)
555- execute_cse_reciprocals_1 (&bsi, def, PR_AFTER_BSI);
556+ execute_cse_reciprocals_1 (&bsi, def);
557 }
558 }
559
560- if (flag_trapping_math)
561- free_dominance_info (CDI_POST_DOMINATORS);
562-
563- if (single_succ_p (ENTRY_BLOCK_PTR))
564- bsi_commit_one_edge_insert (single_succ_edge (ENTRY_BLOCK_PTR), NULL);
565+ free_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
566+ free_alloc_pool (occ_pool);
567 }
568
569 struct tree_opt_pass pass_cse_reciprocals =
This page took 0.251503 seconds and 4 git commands to generate.