@@ -264,7 +264,7 @@ export const __Math_expm1 = (x: number): number => {
264264 return x ;
265265 }
266266
267- // use exp(x) - 1 for large x (perf)
267+ // opt: use exp(x) - 1 for large x
268268 if ( Math . abs ( x ) > 1e-5 ) return Math . exp ( x ) - 1 ;
269269
270270 // Taylor series
@@ -285,7 +285,7 @@ export const __Math_log1p = (x: number): number => {
285285 if ( x == - 1 ) return - Infinity ; // log(0) = -inf
286286 if ( ! Number . isFinite ( x ) ) return x ;
287287
288- // use exp(x) - 1 for large x (perf)
288+ // opt: use exp(x) - 1 for large x
289289 if ( Math . abs ( x ) > 1e-5 ) return Math . log ( 1 + x ) ;
290290
291291 // Taylor series
@@ -459,4 +459,83 @@ export const __Math_atan2 = (y: number, x: number): number => {
459459
460460 if ( y >= 0 ) return Math . atan ( ratio ) + Math . PI ;
461461 return Math . atan ( ratio ) - Math . PI ;
462+ } ;
463+
464+ export const __Math_sumPrecise = ( values : any [ ] ) : number => {
465+ // based on "Fast exact summation using small and large superaccumulators" by Radford M. Neal
466+ // https://arxiv.org/abs/1505.05571
467+ // accuracy is top priority, it is fine for this to be slow(er)
468+
469+ // small superaccumulator uses 67 chunks: (1 << (11 - 5)) + 3
470+ // 11 is the number of exponent bits in IEEE-754 double precision
471+ // 5 is the number of low-order exponent bits stored per chunk
472+ const SMALL_SLOTS : number = 67 ;
473+ const SMALL_MIN : number = - 970 ;
474+ const small : Float64Array = new Float64Array ( SMALL_SLOTS ) ;
475+
476+ // large superaccumulator uses 4096 chunks: 1 << (11 + 1)
477+ // 11 is the number of exponent bits in IEEE-754 double precision
478+ const LARGE_SLOTS : number = 4096 ;
479+ const LARGE_MIN : number = - 1074 ;
480+ const large : Float64Array = new Float64Array ( LARGE_SLOTS ) ;
481+
482+ for ( const _ of values ) {
483+ if ( Porffor . rawType ( _ ) != Porffor . TYPES . number ) throw new TypeError ( 'Math.sumPrecise must have only numbers in values' ) ;
484+
485+ const v : number = _ ;
486+ if ( v == 0 ) continue ;
487+
488+ const exp : number = Porffor . number . getExponent ( v ) ;
489+
490+ // check if value fits in small superaccumulator
491+ if ( exp >= SMALL_MIN && exp < SMALL_MIN + SMALL_SLOTS ) {
492+ // map the exponent to an array index (-970 -> 0, -969 -> 1, etc)
493+ const slot : number = exp - SMALL_MIN ;
494+ let y : number = v ;
495+
496+ // cascade up through slots, similar to carrying digits in decimal
497+ // but operating in binary and handling floating point carefully
498+ for ( let i : number = slot ; i < SMALL_SLOTS - 1 ; i ++ ) {
499+ const sum : number = small [ i ] + y ;
500+ y = sum ;
501+
502+ // a number fits in slot i if its magnitude is less than 2^(i+SMALL_MIN+1)
503+ const slotLimit : number = Math . pow ( 2 , i + SMALL_MIN + 1 ) ;
504+ if ( y >= - slotLimit && y < slotLimit ) {
505+ small [ i ] = y ;
506+ y = 0 ;
507+ break ;
508+ }
509+
510+ // doesn't fit, clear this slot and continue cascading
511+ small [ i ] = 0 ;
512+ }
513+
514+ // if we still have a non-zero value after cascading through small,
515+ // it needs to go into the large superaccumulator
516+ if ( y != 0 ) {
517+ large [ Porffor . number . getExponent ( y ) - LARGE_MIN ] += y ;
518+ }
519+ } else {
520+ // exponent is outside small superaccumulator range,
521+ // put it directly in the large superaccumulator
522+ large [ Porffor . number . getExponent ( v ) - LARGE_MIN ] += v ;
523+ }
524+ }
525+
526+ // combine results from both superaccumulators,
527+ // process from highest to lowest to maintain precision
528+ // todo: handle -0 (see test262 test)
529+ let sum : number = - 0 ;
530+ for ( let i : number = LARGE_SLOTS - 1 ; i >= 0 ; i -- ) {
531+ sum += large [ i ] ;
532+ }
533+
534+ for ( let i : number = SMALL_SLOTS - 1 ; i >= 0 ; i -- ) {
535+ sum += small [ i ] ;
536+ }
537+
538+ // todo: free large and small
539+
540+ return sum ;
462541} ;
0 commit comments