@@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val,
4545845458 return JS_NewInt32(ctx, r);
4545945459}
4546045460
45461- /* we add one extra limb to avoid having to test for overflows during the sum */
45462- #define SUM_PRECISE_ACC_LEN 34
45463-
4546445461typedef enum {
45465- SUM_PRECISE_STATE_MINUS_ZERO,
4546645462 SUM_PRECISE_STATE_FINITE,
4546745463 SUM_PRECISE_STATE_INFINITY,
4546845464 SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
4546945465 SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
4547045466} SumPreciseStateEnum;
4547145467
45468+ #define SP_LIMB_BITS 56
45469+ #define SP_RND_BITS (SP_LIMB_BITS - 53)
45470+ /* we add one extra limb to avoid having to test for overflows during the sum */
45471+ #define SUM_PRECISE_ACC_LEN 39
45472+
45473+ #define SUM_PRECISE_COUNTER_INIT 250
45474+
4547245475typedef struct {
45473- uint64_t acc[SUM_PRECISE_ACC_LEN];
45474- int n_limbs; /* acc is not necessarily normalized */
4547545476 SumPreciseStateEnum state;
45477+ uint32_t counter;
45478+ int n_limbs; /* 'acc' contains n_limbs and is not necessarily
45479+ acc[n_limb - 1] may be 0. 0 indicates minus zero
45480+ result when state = SUM_PRECISE_STATE_FINITE */
45481+ int64_t acc[SUM_PRECISE_ACC_LEN];
4547645482} SumPreciseState;
4547745483
4547845484static void sum_precise_init(SumPreciseState *s)
4547945485{
45480- s->state = SUM_PRECISE_STATE_MINUS_ZERO;
45481- s->acc[0] = 0;
45482- s->n_limbs = 1;
45486+ memset(s->acc, 0, sizeof(s->acc));
45487+ s->state = SUM_PRECISE_STATE_FINITE;
45488+ s->counter = SUM_PRECISE_COUNTER_INIT;
45489+ s->n_limbs = 0;
4548345490}
4548445491
45485- #define ADDC64(res, carry_out, op1, op2, carry_in) \
45486- do { \
45487- uint64_t __v, __a, __k, __k1; \
45488- __v = (op1); \
45489- __a = __v + (op2); \
45490- __k1 = __a < __v; \
45491- __k = (carry_in); \
45492- __a = __a + __k; \
45493- carry_out = (__a < __k) | __k1; \
45494- res = __a; \
45495- } while (0)
45492+ static void sum_precise_renorm(SumPreciseState *s)
45493+ {
45494+ int64_t v, carry;
45495+ int i;
45496+
45497+ carry = 0;
45498+ for(i = 0; i < s->n_limbs; i++) {
45499+ v = s->acc[i] + carry;
45500+ s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45501+ carry = v >> SP_LIMB_BITS;
45502+ }
45503+ /* we add a failsafe but it should be never reached in a
45504+ reasonnable amount of time */
45505+ if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
45506+ s->acc[s->n_limbs++] = carry;
45507+ }
4549645508
4549745509static void sum_precise_add(SumPreciseState *s, double d)
4549845510{
45499- uint64_t a, m, a0, carry, acc_sign, a_sign ;
45500- int sgn, e, p, n, i ;
45501- unsigned shift;
45511+ uint64_t a, m, a0, a1 ;
45512+ int sgn, e, p;
45513+ unsigned int shift;
4550245514
4550345515 a = float64_as_uint64(d);
4550445516 sgn = a >> 63;
@@ -45521,78 +45533,50 @@ static void sum_precise_add(SumPreciseState *s, double d)
4552145533 } else if (e == 0) {
4552245534 if (likely(m == 0)) {
4552345535 /* zero */
45524- if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn)
45525- s->state = SUM_PRECISE_STATE_FINITE ;
45536+ if (s->n_limbs == 0 && !sgn)
45537+ s->n_limbs = 1 ;
4552645538 } else {
4552745539 /* subnormal */
4552845540 p = 0;
4552945541 shift = 0;
4553045542 goto add;
4553145543 }
4553245544 } else {
45545+ /* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
45546+ avoid tests */
4553345547 m |= (uint64_t)1 << 52;
4553445548 shift = e - 1;
45535- p = shift / 64;
45536- /* 'p' is the position of a0 in acc */
45537- shift %= 64;
45549+ /* 'p' is the position of a0 in acc. The division is normally
45550+ implementation as a multiplication by the compiler. */
45551+ p = shift / SP_LIMB_BITS;
45552+ shift %= SP_LIMB_BITS;
4553845553 add:
45539- if (s->state >= SUM_PRECISE_STATE_INFINITY)
45540- return;
45541- s->state = SUM_PRECISE_STATE_FINITE;
45542- n = s->n_limbs;
45543-
45544- acc_sign = (int64_t)s->acc[n - 1] >> 63;
45545-
45546- /* sign extend acc */
45547- for(i = n; i <= p; i++)
45548- s->acc[i] = acc_sign;
45549-
45550- carry = sgn;
45551- a_sign = -sgn;
45552- a0 = m << shift;
45553- ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
45554- if (shift >= 12) {
45555- p++;
45556- if (p >= n)
45557- s->acc[p] = acc_sign;
45558- a0 = m >> (64 - shift);
45559- ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
45560- }
45561- p++;
45562- if (p >= n) {
45563- n = p;
45554+ a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45555+ a1 = m >> (SP_LIMB_BITS - shift);
45556+ if (!sgn) {
45557+ s->acc[p] += a0;
45558+ s->acc[p + 1] += a1;
4556445559 } else {
45565- /* carry */
45566- for(i = p; i < n; i++) {
45567- /* if 'a' positive: stop condition: carry = 0.
45568- if 'a' negative: stop condition: carry = 1. */
45569- if (carry == sgn)
45570- goto done;
45571- ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
45572- }
45560+ s->acc[p] -= a0;
45561+ s->acc[p + 1] -= a1;
4557345562 }
45574-
45575- /* extend the accumulator if needed */
45576- a0 = carry + acc_sign + a_sign;
45577- /* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */
45578- if (a0 != ((int64_t)s->acc[n - 1] >> 63)) {
45579- s->acc[n++] = a0;
45563+ s->n_limbs = max_int(s->n_limbs, p + 2);
45564+
45565+ if (unlikely(--s->counter == 0)) {
45566+ s->counter = SUM_PRECISE_COUNTER_INIT;
45567+ sum_precise_renorm(s);
4558045568 }
45581- done:
45582- s->n_limbs = n;
4558345569 }
4558445570}
4558545571
4558645572static double sum_precise_get_result(SumPreciseState *s)
4558745573{
45588- int n, shift, e, p, is_neg, i ;
45589- uint64_t m, addend, carry ;
45574+ int n, shift, e, p, is_neg;
45575+ uint64_t m, addend;
4559045576
4559145577 if (s->state != SUM_PRECISE_STATE_FINITE) {
4559245578 switch(s->state) {
4559345579 default:
45594- case SUM_PRECISE_STATE_MINUS_ZERO:
45595- return -0.0;
4559645580 case SUM_PRECISE_STATE_INFINITY:
4559745581 return INFINITY;
4559845582 case SUM_PRECISE_STATE_MINUS_INFINITY:
@@ -45602,51 +45586,66 @@ static double sum_precise_get_result(SumPreciseState *s)
4560245586 }
4560345587 }
4560445588
45589+ sum_precise_renorm(s);
45590+
4560545591 /* extract the sign and absolute value */
45606- n = s->n_limbs;
45607- is_neg = s->acc[n - 1] >> 63;
45608- if (is_neg) {
45609- /* acc = -acc */
45610- carry = 1;
45611- for(i = 0; i < n; i++) {
45612- ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
45613- }
45614- }
45615- /* normalize */
45616- while (n > 0 && s->acc[n - 1] == 0)
45617- n--;
4561845592#if 0
4561945593 {
45620- printf("res=");
45621- for(i = n - 1; i >= 0; i--)
45622- printf(" %016lx", s->acc[i]);
45594+ int i;
45595+ printf("len=%d:", s->n_limbs);
45596+ for(i = s->n_limbs - 1; i >= 0; i--)
45597+ printf(" %014lx", s->acc[i]);
4562345598 printf("\n");
4562445599 }
4562545600#endif
45601+ n = s->n_limbs;
45602+ /* minus zero result */
45603+ if (n == 0)
45604+ return -0.0;
45605+
45606+ /* normalize */
45607+ while (n > 0 && s->acc[n - 1] == 0)
45608+ n--;
4562645609 /* zero result. The spec tells it is always positive in the finite case */
4562745610 if (n == 0)
4562845611 return 0.0;
45612+ is_neg = (s->acc[n - 1] < 0);
45613+ if (is_neg) {
45614+ uint64_t v, carry;
45615+ int i;
45616+ /* negate */
45617+ /* XXX: do it only when needed */
45618+ carry = 1;
45619+ for(i = 0; i < n - 1; i++) {
45620+ v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
45621+ carry = v >> SP_LIMB_BITS;
45622+ s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45623+ }
45624+ s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
45625+ while (n > 1 && s->acc[n - 1] == 0)
45626+ n--;
45627+ }
4562945628 /* subnormal case */
4563045629 if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
4563145630 return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
4563245631 /* normal case */
45633- e = n * 64 ;
45632+ e = n * SP_LIMB_BITS ;
4563445633 p = n - 1;
4563545634 m = s->acc[p];
45636- shift = clz64(m);
45635+ shift = clz64(m) - (64 - SP_LIMB_BITS) ;
4563745636 e = e - shift - 52;
4563845637 if (shift != 0) {
4563945638 m <<= shift;
4564045639 if (p > 0) {
4564145640 int shift1;
4564245641 uint64_t nz;
4564345642 p--;
45644- shift1 = 64 - shift;
45643+ shift1 = SP_LIMB_BITS - shift;
4564545644 nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
4564645645 m = m | (s->acc[p] >> shift1) | (nz != 0);
4564745646 }
4564845647 }
45649- if ((m & ((1 << 10 ) - 1)) == 0 ) {
45648+ if ((m & ((1 << SP_RND_BITS ) - 1)) == (1 << (SP_RND_BITS - 1)) ) {
4565045649 /* see if the LSB part is non zero for the final rounding */
4565145650 while (p > 0) {
4565245651 p--;
@@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s)
4565745656 }
4565845657 }
4565945658 /* rounding to nearest with ties to even */
45660- addend = (1 << 10) - 1 + ((m >> 11 ) & 1);
45661- m = (m + addend) >> 11 ;
45659+ addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS ) & 1);
45660+ m = (m + addend) >> SP_RND_BITS ;
4566245661 /* handle overflow in the rounding */
45663- if (m == 0 )
45662+ if (m == ((uint64_t)1 << 53) )
4566445663 e++;
4566545664 if (unlikely(e >= 2047)) {
4566645665 /* infinity */
0 commit comments