1 | /*
|
---|
2 | (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
---|
3 | See the copyright notice in the ACK home directory, in the file "Copyright".
|
---|
4 | */
|
---|
5 |
|
---|
6 | /* $Header: /cvsup/minix/src/lib/float/div_ext.c,v 1.1.1.1 2005/04/21 14:56:10 beng Exp $ */
|
---|
7 |
|
---|
8 | /*
|
---|
9 | DIVIDE EXTENDED FORMAT
|
---|
10 | */
|
---|
11 |
|
---|
12 | #include "FP_bias.h"
|
---|
13 | #include "FP_trap.h"
|
---|
14 | #include "FP_types.h"
|
---|
15 |
|
---|
16 | /*
|
---|
17 | November 15, 1984
|
---|
18 |
|
---|
19 | This is a routine to do the work.
|
---|
20 | There are two versions:
|
---|
21 | One is based on the partial products method
|
---|
22 | and makes no use possible machine instructions
|
---|
23 | to divide (hardware dividers).
|
---|
24 | The other is used when USE_DIVIDE is defined. It is much faster on
|
---|
25 | machines with fast 4 byte operations.
|
---|
26 | */
|
---|
27 | /********************************************************/
|
---|
28 |
|
---|
29 | void
|
---|
30 | div_ext(e1,e2)
|
---|
31 | EXTEND *e1,*e2;
|
---|
32 | {
|
---|
33 | short error = 0;
|
---|
34 | B64 result;
|
---|
35 | register unsigned long *lp;
|
---|
36 | #ifndef USE_DIVIDE
|
---|
37 | short count;
|
---|
38 | #else
|
---|
39 | unsigned short u[9], v[5];
|
---|
40 | register int j;
|
---|
41 | register unsigned short *u_p = u;
|
---|
42 | int maxv = 4;
|
---|
43 | #endif
|
---|
44 |
|
---|
45 | if ((e2->m1 | e2->m2) == 0) {
|
---|
46 | /*
|
---|
47 | * Exception 8.2 - Divide by zero
|
---|
48 | */
|
---|
49 | trap(EFDIVZ);
|
---|
50 | e1->m1 = e1->m2 = 0L;
|
---|
51 | e1->exp = EXT_MAX;
|
---|
52 | return;
|
---|
53 | }
|
---|
54 | if ((e1->m1 | e1->m2) == 0) { /* 0 / anything == 0 */
|
---|
55 | e1->exp = 0; /* make sure */
|
---|
56 | return;
|
---|
57 | }
|
---|
58 | #ifndef USE_DIVIDE
|
---|
59 | /*
|
---|
60 | * numbers are right shifted one bit to make sure
|
---|
61 | * that m1 is quaranteed to be larger if its
|
---|
62 | * maximum bit is set
|
---|
63 | */
|
---|
64 | b64_rsft(&e1->mantissa); /* 64 bit shift right */
|
---|
65 | b64_rsft(&e2->mantissa); /* 64 bit shift right */
|
---|
66 | e1->exp++;
|
---|
67 | e2->exp++;
|
---|
68 | #endif
|
---|
69 | /* check for underflow, divide by zero, etc */
|
---|
70 | e1->sign ^= e2->sign;
|
---|
71 | e1->exp -= e2->exp;
|
---|
72 |
|
---|
73 | #ifndef USE_DIVIDE
|
---|
74 | /* do division of mantissas */
|
---|
75 | /* uses partial product method */
|
---|
76 | /* init control variables */
|
---|
77 |
|
---|
78 | count = 64;
|
---|
79 | result.h_32 = 0L;
|
---|
80 | result.l_32 = 0L;
|
---|
81 |
|
---|
82 | /* partial product division loop */
|
---|
83 |
|
---|
84 | while (count--) {
|
---|
85 | /* first left shift result 1 bit */
|
---|
86 | /* this is ALWAYS done */
|
---|
87 |
|
---|
88 | b64_lsft(&result);
|
---|
89 |
|
---|
90 | /* compare dividend and divisor */
|
---|
91 | /* if dividend >= divisor add a bit */
|
---|
92 | /* and subtract divisior from dividend */
|
---|
93 |
|
---|
94 | if ( (e1->m1 < e2->m1) ||
|
---|
95 | ((e1->m1 == e2->m1) && (e1->m2 < e2->m2) ))
|
---|
96 | ; /* null statement */
|
---|
97 | /* i.e., don't add or subtract */
|
---|
98 | else {
|
---|
99 | result.l_32++; /* ADD */
|
---|
100 | if (e2->m2 > e1->m2)
|
---|
101 | e1->m1 -= 1; /* carry in */
|
---|
102 | e1->m1 -= e2->m1; /* do SUBTRACTION */
|
---|
103 | e1->m2 -= e2->m2; /* SUBTRACTION */
|
---|
104 | }
|
---|
105 |
|
---|
106 | /* shift dividend left one bit OR */
|
---|
107 | /* IF it equals ZERO we can break out */
|
---|
108 | /* of the loop, but still must shift */
|
---|
109 | /* the quotient the remaining count bits */
|
---|
110 | /* NB save the results of this test in error */
|
---|
111 | /* if not zero, then the result is inexact. */
|
---|
112 | /* this would be reported in IEEE standard */
|
---|
113 |
|
---|
114 | /* lp points to dividend */
|
---|
115 | lp = &e1->m1;
|
---|
116 |
|
---|
117 | error = ((*lp | *(lp+1)) != 0L) ? 1 : 0;
|
---|
118 | if (error) { /* more work */
|
---|
119 | /* assume max bit == 0 (see above) */
|
---|
120 | b64_lsft(&e1->mantissa);
|
---|
121 | continue;
|
---|
122 | }
|
---|
123 | else
|
---|
124 | break; /* leave loop */
|
---|
125 | } /* end of divide by subtraction loop */
|
---|
126 |
|
---|
127 | if (count > 0) {
|
---|
128 | lp = &result.h_32;
|
---|
129 | if (count > 31) { /* move to higher word */
|
---|
130 | *lp = *(lp+1);
|
---|
131 | count -= 32;
|
---|
132 | *(lp+1) = 0L; /* clear low word */
|
---|
133 | }
|
---|
134 | if (*lp)
|
---|
135 | *lp <<= count; /* shift rest of way */
|
---|
136 | lp++; /* == &result.l_32 */
|
---|
137 | if (*lp) {
|
---|
138 | result.h_32 |= (*lp >> 32-count);
|
---|
139 | *lp <<= count;
|
---|
140 | }
|
---|
141 | }
|
---|
142 | #else /* USE_DIVIDE */
|
---|
143 |
|
---|
144 | u[4] = (e1->m2 & 1) << 15;
|
---|
145 | b64_rsft(&(e1->mantissa));
|
---|
146 | u[0] = e1->m1 >> 16;
|
---|
147 | u[1] = e1->m1;
|
---|
148 | u[2] = e1->m2 >> 16;
|
---|
149 | u[3] = e1->m2;
|
---|
150 | u[5] = 0; u[6] = 0; u[7] = 0;
|
---|
151 | v[1] = e2->m1 >> 16;
|
---|
152 | v[2] = e2->m1;
|
---|
153 | v[3] = e2->m2 >> 16;
|
---|
154 | v[4] = e2->m2;
|
---|
155 | while (! v[maxv]) maxv--;
|
---|
156 | result.h_32 = 0;
|
---|
157 | result.l_32 = 0;
|
---|
158 | lp = &result.h_32;
|
---|
159 |
|
---|
160 | /*
|
---|
161 | * Use an algorithm of Knuth (The art of programming, Seminumerical
|
---|
162 | * algorithms), to divide u by v. u and v are both seen as numbers
|
---|
163 | * with base 65536.
|
---|
164 | */
|
---|
165 | for (j = 0; j <= 3; j++, u_p++) {
|
---|
166 | unsigned long q_est, temp;
|
---|
167 |
|
---|
168 | if (j == 2) lp++;
|
---|
169 | if (u_p[0] == 0 && u_p[1] < v[1]) continue;
|
---|
170 | temp = ((unsigned long)u_p[0] << 16) + u_p[1];
|
---|
171 | if (u_p[0] >= v[1]) {
|
---|
172 | q_est = 0x0000FFFFL;
|
---|
173 | }
|
---|
174 | else {
|
---|
175 | q_est = temp / v[1];
|
---|
176 | }
|
---|
177 | temp -= q_est * v[1];
|
---|
178 | while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {
|
---|
179 | q_est--;
|
---|
180 | temp += v[1];
|
---|
181 | }
|
---|
182 | /* Now, according to Knuth, we have an estimate of the
|
---|
183 | quotient, that is either correct or one too big, but
|
---|
184 | almost always correct.
|
---|
185 | */
|
---|
186 | if (q_est != 0) {
|
---|
187 | int i;
|
---|
188 | unsigned long k = 0;
|
---|
189 | int borrow = 0;
|
---|
190 |
|
---|
191 | for (i = maxv; i > 0; i--) {
|
---|
192 | unsigned long tmp = q_est * v[i] + k + borrow;
|
---|
193 | unsigned short md = tmp;
|
---|
194 |
|
---|
195 | borrow = (md > u_p[i]);
|
---|
196 | u_p[i] -= md;
|
---|
197 | k = tmp >> 16;
|
---|
198 | }
|
---|
199 | k += borrow;
|
---|
200 | borrow = u_p[0] < k;
|
---|
201 | u_p[0] -= k;
|
---|
202 |
|
---|
203 | if (borrow) {
|
---|
204 | /* So, this does not happen often; the estimate
|
---|
205 | was one too big; correct this
|
---|
206 | */
|
---|
207 | *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
|
---|
208 | borrow = 0;
|
---|
209 | for (i = maxv; i > 0; i--) {
|
---|
210 | unsigned long tmp
|
---|
211 | = v[i]+(unsigned long)u_p[i]+borrow;
|
---|
212 |
|
---|
213 | u_p[i] = tmp;
|
---|
214 | borrow = tmp >> 16;
|
---|
215 | }
|
---|
216 | u_p[0] += borrow;
|
---|
217 | }
|
---|
218 | else *lp |= (j & 1) ? q_est : (q_est<<16);
|
---|
219 | }
|
---|
220 | }
|
---|
221 | #ifdef EXCEPTION_INEXACT
|
---|
222 | u_p = &u[0];
|
---|
223 | for (j = 7; j >= 0; j--) {
|
---|
224 | if (*u_p++) {
|
---|
225 | error = 1;
|
---|
226 | break;
|
---|
227 | }
|
---|
228 | }
|
---|
229 | #endif
|
---|
230 | #endif
|
---|
231 |
|
---|
232 | #ifdef EXCEPTION_INEXACT
|
---|
233 | if (error) {
|
---|
234 | /*
|
---|
235 | * report here exception 8.5 - Inexact
|
---|
236 | * from Draft 8.0 of IEEE P754:
|
---|
237 | * In the absence of an invalid operation exception,
|
---|
238 | * if the rounded result of an operation is not exact or if
|
---|
239 | * it overflows without a trap, then the inexact exception
|
---|
240 | * shall be assigned. The rounded or overflowed result
|
---|
241 | * shall be delivered to the destination.
|
---|
242 | */
|
---|
243 | INEXACT();
|
---|
244 | #endif
|
---|
245 | e1->mantissa = result;
|
---|
246 |
|
---|
247 | nrm_ext(e1);
|
---|
248 | if (e1->exp < EXT_MIN) {
|
---|
249 | /*
|
---|
250 | * Exception 8.4 - Underflow
|
---|
251 | */
|
---|
252 | trap(EFUNFL); /* underflow */
|
---|
253 | e1->exp = EXT_MIN;
|
---|
254 | e1->m1 = e1->m2 = 0L;
|
---|
255 | return;
|
---|
256 | }
|
---|
257 | if (e1->exp >= EXT_MAX) {
|
---|
258 | /*
|
---|
259 | * Exception 8.3 - Overflow
|
---|
260 | */
|
---|
261 | trap(EFOVFL); /* overflow */
|
---|
262 | e1->exp = EXT_MAX;
|
---|
263 | e1->m1 = e1->m2 = 0L;
|
---|
264 | return;
|
---|
265 | }
|
---|
266 | }
|
---|