/* (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. See the copyright notice in the ACK home directory, in the file "Copyright". */ /* $Header: /cvsup/minix/src/lib/ack/fphook/div_ext.fc,v 1.1 2005/10/10 15:27:43 beng Exp $ */ /* DIVIDE EXTENDED FORMAT */ #include "FP_bias.h" #include "FP_trap.h" #include "FP_types.h" /* November 15, 1984 This is a routine to do the work. There are two versions: One is based on the partial products method and makes no use possible machine instructions to divide (hardware dividers). The other is used when USE_DIVIDE is defined. It is much faster on machines with fast 4 byte operations. */ /********************************************************/ void div_ext(e1,e2) EXTEND *e1,*e2; { short error = 0; B64 result; register unsigned long *lp; #ifndef USE_DIVIDE short count; #else unsigned short u[9], v[5]; register int j; register unsigned short *u_p = u; int maxv = 4; #endif if ((e2->m1 | e2->m2) == 0) { /* * Exception 8.2 - Divide by zero */ trap(EFDIVZ); e1->m1 = e1->m2 = 0L; e1->exp = EXT_MAX; return; } if ((e1->m1 | e1->m2) == 0) { /* 0 / anything == 0 */ e1->exp = 0; /* make sure */ return; } #ifndef USE_DIVIDE /* * numbers are right shifted one bit to make sure * that m1 is quaranteed to be larger if its * maximum bit is set */ b64_rsft(&e1->mantissa); /* 64 bit shift right */ b64_rsft(&e2->mantissa); /* 64 bit shift right */ e1->exp++; e2->exp++; #endif /* check for underflow, divide by zero, etc */ e1->sign ^= e2->sign; e1->exp -= e2->exp; #ifndef USE_DIVIDE /* do division of mantissas */ /* uses partial product method */ /* init control variables */ count = 64; result.h_32 = 0L; result.l_32 = 0L; /* partial product division loop */ while (count--) { /* first left shift result 1 bit */ /* this is ALWAYS done */ b64_lsft(&result); /* compare dividend and divisor */ /* if dividend >= divisor add a bit */ /* and subtract divisior from dividend */ if ( (e1->m1 < e2->m1) || ((e1->m1 == e2->m1) && (e1->m2 < e2->m2) )) ; /* null statement */ /* i.e., don't add or subtract */ else { result.l_32++; /* ADD */ if (e2->m2 > e1->m2) e1->m1 -= 1; /* carry in */ e1->m1 -= e2->m1; /* do SUBTRACTION */ e1->m2 -= e2->m2; /* SUBTRACTION */ } /* shift dividend left one bit OR */ /* IF it equals ZERO we can break out */ /* of the loop, but still must shift */ /* the quotient the remaining count bits */ /* NB save the results of this test in error */ /* if not zero, then the result is inexact. */ /* this would be reported in IEEE standard */ /* lp points to dividend */ lp = &e1->m1; error = ((*lp | *(lp+1)) != 0L) ? 1 : 0; if (error) { /* more work */ /* assume max bit == 0 (see above) */ b64_lsft(&e1->mantissa); continue; } else break; /* leave loop */ } /* end of divide by subtraction loop */ if (count > 0) { lp = &result.h_32; if (count > 31) { /* move to higher word */ *lp = *(lp+1); count -= 32; *(lp+1) = 0L; /* clear low word */ } if (*lp) *lp <<= count; /* shift rest of way */ lp++; /* == &result.l_32 */ if (*lp) { result.h_32 |= (*lp >> 32-count); *lp <<= count; } } #else /* USE_DIVIDE */ u[4] = (e1->m2 & 1) << 15; b64_rsft(&(e1->mantissa)); u[0] = e1->m1 >> 16; u[1] = e1->m1; u[2] = e1->m2 >> 16; u[3] = e1->m2; u[5] = 0; u[6] = 0; u[7] = 0; v[1] = e2->m1 >> 16; v[2] = e2->m1; v[3] = e2->m2 >> 16; v[4] = e2->m2; while (! v[maxv]) maxv--; result.h_32 = 0; result.l_32 = 0; lp = &result.h_32; /* * Use an algorithm of Knuth (The art of programming, Seminumerical * algorithms), to divide u by v. u and v are both seen as numbers * with base 65536. */ for (j = 0; j <= 3; j++, u_p++) { unsigned long q_est, temp; if (j == 2) lp++; if (u_p[0] == 0 && u_p[1] < v[1]) continue; temp = ((unsigned long)u_p[0] << 16) + u_p[1]; if (u_p[0] >= v[1]) { q_est = 0x0000FFFFL; } else { q_est = temp / v[1]; } temp -= q_est * v[1]; while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) { q_est--; temp += v[1]; } /* Now, according to Knuth, we have an estimate of the quotient, that is either correct or one too big, but almost always correct. */ if (q_est != 0) { int i; unsigned long k = 0; int borrow = 0; for (i = maxv; i > 0; i--) { unsigned long tmp = q_est * v[i] + k + borrow; unsigned short md = tmp; borrow = (md > u_p[i]); u_p[i] -= md; k = tmp >> 16; } k += borrow; borrow = u_p[0] < k; u_p[0] -= k; if (borrow) { /* So, this does not happen often; the estimate was one too big; correct this */ *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); borrow = 0; for (i = maxv; i > 0; i--) { unsigned long tmp = v[i]+(unsigned long)u_p[i]+borrow; u_p[i] = tmp; borrow = tmp >> 16; } u_p[0] += borrow; } else *lp |= (j & 1) ? q_est : (q_est<<16); } } #ifdef EXCEPTION_INEXACT u_p = &u[0]; for (j = 7; j >= 0; j--) { if (*u_p++) { error = 1; break; } } #endif #endif #ifdef EXCEPTION_INEXACT if (error) { /* * report here exception 8.5 - Inexact * from Draft 8.0 of IEEE P754: * In the absence of an invalid operation exception, * if the rounded result of an operation is not exact or if * it overflows without a trap, then the inexact exception * shall be assigned. The rounded or overflowed result * shall be delivered to the destination. */ INEXACT(); #endif e1->mantissa = result; nrm_ext(e1); if (e1->exp < EXT_MIN) { /* * Exception 8.4 - Underflow */ trap(EFUNFL); /* underflow */ e1->exp = EXT_MIN; e1->m1 = e1->m2 = 0L; return; } if (e1->exp >= EXT_MAX) { /* * Exception 8.3 - Overflow */ trap(EFOVFL); /* overflow */ e1->exp = EXT_MAX; e1->m1 = e1->m2 = 0L; return; } }