[9] | 1 | /*
|
---|
| 2 | (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
---|
| 3 | See the copyright notice in the ACK home directory, in the file "Copyright".
|
---|
| 4 | */
|
---|
| 5 |
|
---|
| 6 | /* $Header: /cvsup/minix/src/lib/ack/float/div_ext.fc,v 1.1 2005/10/10 15:27:42 beng Exp $ */
|
---|
| 7 |
|
---|
| 8 | /*
|
---|
| 9 | DIVIDE EXTENDED FORMAT
|
---|
| 10 | */
|
---|
| 11 |
|
---|
| 12 | #include "FP_bias.h"
|
---|
| 13 | #include "FP_trap.h"
|
---|
| 14 | #include "FP_types.h"
|
---|
| 15 |
|
---|
| 16 | /*
|
---|
| 17 | November 15, 1984
|
---|
| 18 |
|
---|
| 19 | This is a routine to do the work.
|
---|
| 20 | There are two versions:
|
---|
| 21 | One is based on the partial products method
|
---|
| 22 | and makes no use possible machine instructions
|
---|
| 23 | to divide (hardware dividers).
|
---|
| 24 | The other is used when USE_DIVIDE is defined. It is much faster on
|
---|
| 25 | machines with fast 4 byte operations.
|
---|
| 26 | */
|
---|
| 27 | /********************************************************/
|
---|
| 28 |
|
---|
| 29 | void
|
---|
| 30 | div_ext(e1,e2)
|
---|
| 31 | EXTEND *e1,*e2;
|
---|
| 32 | {
|
---|
| 33 | short error = 0;
|
---|
| 34 | B64 result;
|
---|
| 35 | register unsigned long *lp;
|
---|
| 36 | #ifndef USE_DIVIDE
|
---|
| 37 | short count;
|
---|
| 38 | #else
|
---|
| 39 | unsigned short u[9], v[5];
|
---|
| 40 | register int j;
|
---|
| 41 | register unsigned short *u_p = u;
|
---|
| 42 | int maxv = 4;
|
---|
| 43 | #endif
|
---|
| 44 |
|
---|
| 45 | if ((e2->m1 | e2->m2) == 0) {
|
---|
| 46 | /*
|
---|
| 47 | * Exception 8.2 - Divide by zero
|
---|
| 48 | */
|
---|
| 49 | trap(EFDIVZ);
|
---|
| 50 | e1->m1 = e1->m2 = 0L;
|
---|
| 51 | e1->exp = EXT_MAX;
|
---|
| 52 | return;
|
---|
| 53 | }
|
---|
| 54 | if ((e1->m1 | e1->m2) == 0) { /* 0 / anything == 0 */
|
---|
| 55 | e1->exp = 0; /* make sure */
|
---|
| 56 | return;
|
---|
| 57 | }
|
---|
| 58 | #ifndef USE_DIVIDE
|
---|
| 59 | /*
|
---|
| 60 | * numbers are right shifted one bit to make sure
|
---|
| 61 | * that m1 is quaranteed to be larger if its
|
---|
| 62 | * maximum bit is set
|
---|
| 63 | */
|
---|
| 64 | b64_rsft(&e1->mantissa); /* 64 bit shift right */
|
---|
| 65 | b64_rsft(&e2->mantissa); /* 64 bit shift right */
|
---|
| 66 | e1->exp++;
|
---|
| 67 | e2->exp++;
|
---|
| 68 | #endif
|
---|
| 69 | /* check for underflow, divide by zero, etc */
|
---|
| 70 | e1->sign ^= e2->sign;
|
---|
| 71 | e1->exp -= e2->exp;
|
---|
| 72 |
|
---|
| 73 | #ifndef USE_DIVIDE
|
---|
| 74 | /* do division of mantissas */
|
---|
| 75 | /* uses partial product method */
|
---|
| 76 | /* init control variables */
|
---|
| 77 |
|
---|
| 78 | count = 64;
|
---|
| 79 | result.h_32 = 0L;
|
---|
| 80 | result.l_32 = 0L;
|
---|
| 81 |
|
---|
| 82 | /* partial product division loop */
|
---|
| 83 |
|
---|
| 84 | while (count--) {
|
---|
| 85 | /* first left shift result 1 bit */
|
---|
| 86 | /* this is ALWAYS done */
|
---|
| 87 |
|
---|
| 88 | b64_lsft(&result);
|
---|
| 89 |
|
---|
| 90 | /* compare dividend and divisor */
|
---|
| 91 | /* if dividend >= divisor add a bit */
|
---|
| 92 | /* and subtract divisior from dividend */
|
---|
| 93 |
|
---|
| 94 | if ( (e1->m1 < e2->m1) ||
|
---|
| 95 | ((e1->m1 == e2->m1) && (e1->m2 < e2->m2) ))
|
---|
| 96 | ; /* null statement */
|
---|
| 97 | /* i.e., don't add or subtract */
|
---|
| 98 | else {
|
---|
| 99 | result.l_32++; /* ADD */
|
---|
| 100 | if (e2->m2 > e1->m2)
|
---|
| 101 | e1->m1 -= 1; /* carry in */
|
---|
| 102 | e1->m1 -= e2->m1; /* do SUBTRACTION */
|
---|
| 103 | e1->m2 -= e2->m2; /* SUBTRACTION */
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | /* shift dividend left one bit OR */
|
---|
| 107 | /* IF it equals ZERO we can break out */
|
---|
| 108 | /* of the loop, but still must shift */
|
---|
| 109 | /* the quotient the remaining count bits */
|
---|
| 110 | /* NB save the results of this test in error */
|
---|
| 111 | /* if not zero, then the result is inexact. */
|
---|
| 112 | /* this would be reported in IEEE standard */
|
---|
| 113 |
|
---|
| 114 | /* lp points to dividend */
|
---|
| 115 | lp = &e1->m1;
|
---|
| 116 |
|
---|
| 117 | error = ((*lp | *(lp+1)) != 0L) ? 1 : 0;
|
---|
| 118 | if (error) { /* more work */
|
---|
| 119 | /* assume max bit == 0 (see above) */
|
---|
| 120 | b64_lsft(&e1->mantissa);
|
---|
| 121 | continue;
|
---|
| 122 | }
|
---|
| 123 | else
|
---|
| 124 | break; /* leave loop */
|
---|
| 125 | } /* end of divide by subtraction loop */
|
---|
| 126 |
|
---|
| 127 | if (count > 0) {
|
---|
| 128 | lp = &result.h_32;
|
---|
| 129 | if (count > 31) { /* move to higher word */
|
---|
| 130 | *lp = *(lp+1);
|
---|
| 131 | count -= 32;
|
---|
| 132 | *(lp+1) = 0L; /* clear low word */
|
---|
| 133 | }
|
---|
| 134 | if (*lp)
|
---|
| 135 | *lp <<= count; /* shift rest of way */
|
---|
| 136 | lp++; /* == &result.l_32 */
|
---|
| 137 | if (*lp) {
|
---|
| 138 | result.h_32 |= (*lp >> 32-count);
|
---|
| 139 | *lp <<= count;
|
---|
| 140 | }
|
---|
| 141 | }
|
---|
| 142 | #else /* USE_DIVIDE */
|
---|
| 143 |
|
---|
| 144 | u[4] = (e1->m2 & 1) << 15;
|
---|
| 145 | b64_rsft(&(e1->mantissa));
|
---|
| 146 | u[0] = e1->m1 >> 16;
|
---|
| 147 | u[1] = e1->m1;
|
---|
| 148 | u[2] = e1->m2 >> 16;
|
---|
| 149 | u[3] = e1->m2;
|
---|
| 150 | u[5] = 0; u[6] = 0; u[7] = 0;
|
---|
| 151 | v[1] = e2->m1 >> 16;
|
---|
| 152 | v[2] = e2->m1;
|
---|
| 153 | v[3] = e2->m2 >> 16;
|
---|
| 154 | v[4] = e2->m2;
|
---|
| 155 | while (! v[maxv]) maxv--;
|
---|
| 156 | result.h_32 = 0;
|
---|
| 157 | result.l_32 = 0;
|
---|
| 158 | lp = &result.h_32;
|
---|
| 159 |
|
---|
| 160 | /*
|
---|
| 161 | * Use an algorithm of Knuth (The art of programming, Seminumerical
|
---|
| 162 | * algorithms), to divide u by v. u and v are both seen as numbers
|
---|
| 163 | * with base 65536.
|
---|
| 164 | */
|
---|
| 165 | for (j = 0; j <= 3; j++, u_p++) {
|
---|
| 166 | unsigned long q_est, temp;
|
---|
| 167 |
|
---|
| 168 | if (j == 2) lp++;
|
---|
| 169 | if (u_p[0] == 0 && u_p[1] < v[1]) continue;
|
---|
| 170 | temp = ((unsigned long)u_p[0] << 16) + u_p[1];
|
---|
| 171 | if (u_p[0] >= v[1]) {
|
---|
| 172 | q_est = 0x0000FFFFL;
|
---|
| 173 | }
|
---|
| 174 | else {
|
---|
| 175 | q_est = temp / v[1];
|
---|
| 176 | }
|
---|
| 177 | temp -= q_est * v[1];
|
---|
| 178 | while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {
|
---|
| 179 | q_est--;
|
---|
| 180 | temp += v[1];
|
---|
| 181 | }
|
---|
| 182 | /* Now, according to Knuth, we have an estimate of the
|
---|
| 183 | quotient, that is either correct or one too big, but
|
---|
| 184 | almost always correct.
|
---|
| 185 | */
|
---|
| 186 | if (q_est != 0) {
|
---|
| 187 | int i;
|
---|
| 188 | unsigned long k = 0;
|
---|
| 189 | int borrow = 0;
|
---|
| 190 |
|
---|
| 191 | for (i = maxv; i > 0; i--) {
|
---|
| 192 | unsigned long tmp = q_est * v[i] + k + borrow;
|
---|
| 193 | unsigned short md = tmp;
|
---|
| 194 |
|
---|
| 195 | borrow = (md > u_p[i]);
|
---|
| 196 | u_p[i] -= md;
|
---|
| 197 | k = tmp >> 16;
|
---|
| 198 | }
|
---|
| 199 | k += borrow;
|
---|
| 200 | borrow = u_p[0] < k;
|
---|
| 201 | u_p[0] -= k;
|
---|
| 202 |
|
---|
| 203 | if (borrow) {
|
---|
| 204 | /* So, this does not happen often; the estimate
|
---|
| 205 | was one too big; correct this
|
---|
| 206 | */
|
---|
| 207 | *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
|
---|
| 208 | borrow = 0;
|
---|
| 209 | for (i = maxv; i > 0; i--) {
|
---|
| 210 | unsigned long tmp
|
---|
| 211 | = v[i]+(unsigned long)u_p[i]+borrow;
|
---|
| 212 |
|
---|
| 213 | u_p[i] = tmp;
|
---|
| 214 | borrow = tmp >> 16;
|
---|
| 215 | }
|
---|
| 216 | u_p[0] += borrow;
|
---|
| 217 | }
|
---|
| 218 | else *lp |= (j & 1) ? q_est : (q_est<<16);
|
---|
| 219 | }
|
---|
| 220 | }
|
---|
| 221 | #ifdef EXCEPTION_INEXACT
|
---|
| 222 | u_p = &u[0];
|
---|
| 223 | for (j = 7; j >= 0; j--) {
|
---|
| 224 | if (*u_p++) {
|
---|
| 225 | error = 1;
|
---|
| 226 | break;
|
---|
| 227 | }
|
---|
| 228 | }
|
---|
| 229 | #endif
|
---|
| 230 | #endif
|
---|
| 231 |
|
---|
| 232 | #ifdef EXCEPTION_INEXACT
|
---|
| 233 | if (error) {
|
---|
| 234 | /*
|
---|
| 235 | * report here exception 8.5 - Inexact
|
---|
| 236 | * from Draft 8.0 of IEEE P754:
|
---|
| 237 | * In the absence of an invalid operation exception,
|
---|
| 238 | * if the rounded result of an operation is not exact or if
|
---|
| 239 | * it overflows without a trap, then the inexact exception
|
---|
| 240 | * shall be assigned. The rounded or overflowed result
|
---|
| 241 | * shall be delivered to the destination.
|
---|
| 242 | */
|
---|
| 243 | INEXACT();
|
---|
| 244 | #endif
|
---|
| 245 | e1->mantissa = result;
|
---|
| 246 |
|
---|
| 247 | nrm_ext(e1);
|
---|
| 248 | if (e1->exp < EXT_MIN) {
|
---|
| 249 | /*
|
---|
| 250 | * Exception 8.4 - Underflow
|
---|
| 251 | */
|
---|
| 252 | trap(EFUNFL); /* underflow */
|
---|
| 253 | e1->exp = EXT_MIN;
|
---|
| 254 | e1->m1 = e1->m2 = 0L;
|
---|
| 255 | return;
|
---|
| 256 | }
|
---|
| 257 | if (e1->exp >= EXT_MAX) {
|
---|
| 258 | /*
|
---|
| 259 | * Exception 8.3 - Overflow
|
---|
| 260 | */
|
---|
| 261 | trap(EFOVFL); /* overflow */
|
---|
| 262 | e1->exp = EXT_MAX;
|
---|
| 263 | e1->m1 = e1->m2 = 0L;
|
---|
| 264 | return;
|
---|
| 265 | }
|
---|
| 266 | }
|
---|