]>
Commit | Line | Data |
---|---|---|
58cfc4f2 JB |
1 | /* |
2 | * Copyright (c) 1992, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This software was developed by the Computer Systems Engineering group | |
6 | * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and | |
7 | * contributed to Berkeley. | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | |
17 | * 3. All advertising materials mentioning features or use of this software | |
18 | * must display the following acknowledgement: | |
19 | * This product includes software developed by the University of | |
20 | * California, Berkeley and its contributors. | |
21 | * 4. Neither the name of the University nor the names of its contributors | |
22 | * may be used to endorse or promote products derived from this software | |
23 | * without specific prior written permission. | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
35 | * SUCH DAMAGE. | |
36 | * | |
37 | * from: Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp | |
38 | * $NetBSD: divrem.m4,v 1.4 1997/10/09 10:07:54 lukem Exp $ | |
39 | */ | |
40 | ||
41 | /* | |
42 | * Division and remainder, from Appendix E of the Sparc Version 8 | |
43 | * Architecture Manual, with fixes from Gordon Irlam. | |
44 | */ | |
45 | ||
46 | #if defined(LIBC_SCCS) && !defined(lint) | |
47 | .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93" | |
48 | #endif /* LIBC_SCCS and not lint */ | |
49 | ||
50 | /* | |
51 | * Input: dividend and divisor in %o0 and %o1 respectively. | |
52 | * | |
53 | * m4 parameters: | |
54 | * NAME name of function to generate | |
55 | * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 | |
56 | * S S=true => signed; S=false => unsigned | |
57 | * | |
58 | * Algorithm parameters: | |
59 | * N how many bits per iteration we try to get (4) | |
60 | * WORDSIZE total number of bits (32) | |
61 | * | |
62 | * Derived constants: | |
63 | * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken) | |
64 | * TOPBITS number of bits in the top `decade' of a number | |
65 | * | |
66 | * Important variables: | |
67 | * Q the partial quotient under development (initially 0) | |
68 | * R the remainder so far, initially the dividend | |
69 | * ITER number of main division loop iterations required; | |
70 | * equal to ceil(log2(quotient) / N). Note that this | |
71 | * is the log base (2^N) of the quotient. | |
72 | * V the current comparand, initially divisor*2^(ITER*N-1) | |
73 | * | |
74 | * Cost: | |
75 | * Current estimate for non-large dividend is | |
76 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | |
77 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | |
78 | * different path, as the upper bits of the quotient must be developed | |
79 | * one bit at a time. | |
80 | */ | |
81 | ||
82 | define(N, `4') | |
83 | define(TWOSUPN, `16') | |
84 | define(WORDSIZE, `32') | |
85 | define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N))) | |
86 | ||
87 | define(dividend, `%o0') | |
88 | define(divisor, `%o1') | |
89 | define(Q, `%o2') | |
90 | define(R, `%o3') | |
91 | define(ITER, `%o4') | |
92 | define(V, `%o5') | |
93 | ||
94 | /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */ | |
95 | define(T, `%g1') | |
96 | define(SC, `%g7') | |
97 | ifelse(S, `true', `define(SIGN, `%g6')') | |
98 | ||
99 | /* | |
100 | * This is the recursive definition for developing quotient digits. | |
101 | * | |
102 | * Parameters: | |
103 | * $1 the current depth, 1 <= $1 <= N | |
104 | * $2 the current accumulation of quotient bits | |
105 | * N max depth | |
106 | * | |
107 | * We add a new bit to $2 and either recurse or insert the bits in | |
108 | * the quotient. R, Q, and V are inputs and outputs as defined above; | |
109 | * the condition codes are expected to reflect the input R, and are | |
110 | * modified to reflect the output R. | |
111 | */ | |
112 | define(DEVELOP_QUOTIENT_BITS, | |
113 | ` ! depth $1, accumulated bits $2 | |
114 | bl L.$1.eval(TWOSUPN+$2) | |
115 | srl V,1,V | |
116 | ! remainder is positive | |
117 | subcc R,V,R | |
118 | ifelse($1, N, | |
119 | ` b 9f | |
120 | add Q, ($2*2+1), Q | |
121 | ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') | |
122 | L.$1.eval(TWOSUPN+$2): | |
123 | ! remainder is negative | |
124 | addcc R,V,R | |
125 | ifelse($1, N, | |
126 | ` b 9f | |
127 | add Q, ($2*2-1), Q | |
128 | ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') | |
129 | ifelse($1, 1, `9:')') | |
130 | ||
131 | #define C_LABEL(name) name: | |
132 | ||
133 | #define C_SYMBOL_NAME(name) name | |
134 | ||
135 | #define ENTRY(name) \ | |
136 | .global C_SYMBOL_NAME(name); \ | |
137 | .align 4;\ | |
138 | C_LABEL(name);\ | |
139 | .type name,@function; | |
140 | ||
141 | #define END(name) \ | |
142 | .size name, . - name | |
143 | ||
144 | #define ST_DIV0 0x02 | |
145 | ||
146 | ||
147 | ENTRY(NAME) | |
148 | ifelse(S, `true', | |
149 | ` ! compute sign of result; if neither is negative, no problem | |
150 | orcc divisor, dividend, %g0 ! either negative? | |
151 | bge 2f ! no, go do the divide | |
152 | ifelse(OP, `div', | |
153 | `xor divisor, dividend, SIGN', | |
154 | `mov dividend, SIGN') ! compute sign in any case | |
155 | tst divisor | |
156 | bge 1f | |
157 | tst dividend | |
158 | ! divisor is definitely negative; dividend might also be negative | |
159 | bge 2f ! if dividend not negative... | |
160 | neg divisor ! in any case, make divisor nonneg | |
161 | 1: ! dividend is negative, divisor is nonnegative | |
162 | neg dividend ! make dividend nonnegative | |
163 | 2: | |
164 | ') | |
165 | ! Ready to divide. Compute size of quotient; scale comparand. | |
166 | orcc divisor, %g0, V | |
167 | bnz 1f | |
168 | mov dividend, R | |
169 | ||
170 | ! Divide by zero trap. If it returns, return 0 (about as | |
171 | ! wrong as possible, but that is what SunOS does...). | |
172 | t ST_DIV0 | |
173 | retl | |
174 | clr %o0 | |
175 | ||
176 | 1: | |
177 | cmp R, V ! if divisor exceeds dividend, done | |
178 | blu Lgot_result ! (and algorithm fails otherwise) | |
179 | clr Q | |
180 | sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T | |
181 | cmp R, T | |
182 | blu Lnot_really_big | |
183 | clr ITER | |
184 | ||
185 | ! `Here the dividend is >= 2^(31-N) or so. We must be careful here, | |
186 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | |
187 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | |
188 | ! Compute ITER in an unorthodox manner: know we need to shift V into | |
189 | ! the top decade: so do not even bother to compare to R.' | |
190 | 1: | |
191 | cmp V, T | |
192 | bgeu 3f | |
193 | mov 1, SC | |
194 | sll V, N, V | |
195 | b 1b | |
196 | inc ITER | |
197 | ||
198 | ! Now compute SC. | |
199 | 2: addcc V, V, V | |
200 | bcc Lnot_too_big | |
201 | inc SC | |
202 | ||
203 | ! We get here if the divisor overflowed while shifting. | |
204 | ! This means that R has the high-order bit set. | |
205 | ! Restore V and subtract from R. | |
206 | sll T, TOPBITS, T ! high order bit | |
207 | srl V, 1, V ! rest of V | |
208 | add V, T, V | |
209 | b Ldo_single_div | |
210 | dec SC | |
211 | ||
212 | Lnot_too_big: | |
213 | 3: cmp V, R | |
214 | blu 2b | |
215 | nop | |
216 | be Ldo_single_div | |
217 | nop | |
218 | /* NB: these are commented out in the V8-Sparc manual as well */ | |
219 | /* (I do not understand this) */ | |
220 | ! V > R: went too far: back up 1 step | |
221 | ! srl V, 1, V | |
222 | ! dec SC | |
223 | ! do single-bit divide steps | |
224 | ! | |
225 | ! We have to be careful here. We know that R >= V, so we can do the | |
226 | ! first divide step without thinking. BUT, the others are conditional, | |
227 | ! and are only done if R >= 0. Because both R and V may have the high- | |
228 | ! order bit set in the first step, just falling into the regular | |
229 | ! division loop will mess up the first time around. | |
230 | ! So we unroll slightly... | |
231 | Ldo_single_div: | |
232 | deccc SC | |
233 | bl Lend_regular_divide | |
234 | nop | |
235 | sub R, V, R | |
236 | mov 1, Q | |
237 | b Lend_single_divloop | |
238 | nop | |
239 | Lsingle_divloop: | |
240 | sll Q, 1, Q | |
241 | bl 1f | |
242 | srl V, 1, V | |
243 | ! R >= 0 | |
244 | sub R, V, R | |
245 | b 2f | |
246 | inc Q | |
247 | 1: ! R < 0 | |
248 | add R, V, R | |
249 | dec Q | |
250 | 2: | |
251 | Lend_single_divloop: | |
252 | deccc SC | |
253 | bge Lsingle_divloop | |
254 | tst R | |
255 | b,a Lend_regular_divide | |
256 | ||
257 | Lnot_really_big: | |
258 | 1: | |
259 | sll V, N, V | |
260 | cmp V, R | |
261 | bleu 1b | |
262 | inccc ITER | |
263 | be Lgot_result | |
264 | dec ITER | |
265 | ||
266 | tst R ! set up for initial iteration | |
267 | Ldivloop: | |
268 | sll Q, N, Q | |
269 | DEVELOP_QUOTIENT_BITS(1, 0) | |
270 | Lend_regular_divide: | |
271 | deccc ITER | |
272 | bge Ldivloop | |
273 | tst R | |
274 | bl,a Lgot_result | |
275 | ! non-restoring fixup here (one instruction only!) | |
276 | ifelse(OP, `div', | |
277 | ` dec Q | |
278 | ', ` add R, divisor, R | |
279 | ') | |
280 | ||
281 | Lgot_result: | |
282 | ifelse(S, `true', | |
283 | ` ! check to see if answer should be < 0 | |
284 | tst SIGN | |
285 | bl,a 1f | |
286 | ifelse(OP, `div', `neg Q', `neg R') | |
287 | 1:') | |
288 | retl | |
289 | ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') | |
290 | ||
291 | END(NAME) |