@@ -627,4 +627,236 @@ template <typename Ty> Ty __fp_mul(Ty x, Ty y, int rd) {
627627 Ty, (z_sig << (sizeof (Ty) * 8 - 1 )) |
628628 (z_exp << (std::numeric_limits<Ty>::digits - 1 )) | z_fra);
629629}
630+
631+ template <typename UTy> static UTy fra_uint_div (UTy x, UTy y, unsigned nbits) {
632+ UTy res = 0 ;
633+ unsigned iters = 0 ;
634+ if (x == 0 )
635+ return 0x0 ;
636+ while (iters < nbits) {
637+ res = res << 1 ;
638+ x = x << 1 ;
639+ if (x > y) {
640+ x = x - y;
641+ res = res | 0x1 ;
642+ } else if (x == y) {
643+ res = res | 0x1 ;
644+ res = res << (nbits - iters - 1 );
645+ return res;
646+ } else {
647+ }
648+ iters++;
649+ }
650+ res = res | 0x1 ;
651+ return res;
652+ }
653+
654+ template <typename Ty> Ty __fp_div (Ty x, Ty y, int rd) {
655+ typedef typename __iml_fp_config<Ty>::utype UTy;
656+ typedef typename __iml_fp_config<Ty>::stype STy;
657+ UTy x_bit = __builtin_bit_cast (UTy, x);
658+ UTy y_bit = __builtin_bit_cast (UTy, y);
659+ UTy x_exp = (x_bit >> (std::numeric_limits<Ty>::digits - 1 )) &
660+ __iml_fp_config<Ty>::exp_mask;
661+ UTy y_exp = (y_bit >> (std::numeric_limits<Ty>::digits - 1 )) &
662+ __iml_fp_config<Ty>::exp_mask;
663+ UTy x_fra = x_bit & __iml_fp_config<Ty>::fra_mask;
664+ UTy y_fra = y_bit & __iml_fp_config<Ty>::fra_mask;
665+ UTy x_sig = x_bit >> ((sizeof (Ty) * 8 ) - 1 );
666+ UTy y_sig = y_bit >> ((sizeof (Ty) * 8 ) - 1 );
667+ UTy z_sig = x_sig ^ y_sig;
668+ UTy z_exp = 0x0 , z_fra = 0x0 ;
669+ const UTy one_bits = 0x1 ;
670+ const UTy sig_off_mask = (one_bits << (sizeof (UTy) * 8 - 1 )) - 1 ;
671+
672+ if (((x_exp == __iml_fp_config<Ty>::exp_mask) && (x_fra != 0x0 )) ||
673+ ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra != 0x0 )) ||
674+ ((y_bit & sig_off_mask) == 0x0 )) {
675+ UTy tmp = __iml_fp_config<Ty>::nan_bits;
676+ return __builtin_bit_cast (Ty, tmp);
677+ }
678+
679+ if ((x_exp == __iml_fp_config<Ty>::exp_mask) && (x_fra == 0x0 )) {
680+ if ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra == 0x0 )) {
681+ UTy tmp = __iml_fp_config<Ty>::nan_bits;
682+ return __builtin_bit_cast (Ty, tmp);
683+ } else {
684+ UTy tmp =
685+ (z_sig << (sizeof (Ty) * 8 - 1 )) | __iml_fp_config<Ty>::pos_inf_bits;
686+ return __builtin_bit_cast (Ty, tmp);
687+ }
688+ }
689+
690+ if ((x_bit & sig_off_mask) == 0x0 )
691+ return __builtin_bit_cast (Ty, (z_sig << (sizeof (UTy) * 8 - 1 )) | 0x0 );
692+
693+ if ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra == 0x0 ))
694+ return __builtin_bit_cast (Ty, (z_sig << (sizeof (UTy) * 8 - 1 )) | 0x0 );
695+
696+ int sx_exp = x_exp, sy_exp = y_exp;
697+ sx_exp = (sx_exp == 0 ) ? (1 - __iml_fp_config<Ty>::bias)
698+ : (sx_exp - __iml_fp_config<Ty>::bias);
699+ sy_exp = (sy_exp == 0 ) ? (1 - __iml_fp_config<Ty>::bias)
700+ : (sy_exp - __iml_fp_config<Ty>::bias);
701+ int exp_diff = sx_exp - sy_exp;
702+ if (x_exp != 0x0 )
703+ x_fra = (one_bits << (std::numeric_limits<Ty>::digits - 1 )) | x_fra;
704+ if (y_exp != 0x0 )
705+ y_fra = (one_bits << (std::numeric_limits<Ty>::digits - 1 )) | y_fra;
706+
707+ if (x_fra >= y_fra) {
708+ // x_fra / y_fra max value for fp32 is 0xFFFFFF when x is normal
709+ // and y is subnormal, so msb_pos max value is 23
710+ UTy tmp = x_fra / y_fra;
711+ UTy fra_rem = x_fra - y_fra * tmp;
712+ int msb_pos = get_msb_pos (tmp);
713+ int tmp2 = exp_diff + msb_pos;
714+ if (tmp2 > __iml_fp_config<Ty>::bias)
715+ return __handling_fp_overflow<Ty>(z_sig, rd);
716+
717+ if (tmp2 >= (1 - __iml_fp_config<Ty>::bias)) {
718+ // Fall into normal floating point range
719+ z_exp = tmp2 + __iml_fp_config<Ty>::bias;
720+ // For fp32, starting msb_pos bits in fra comes from tmp and we need
721+ // 23 - msb_pos( + grs) more bits from fraction division.
722+ z_fra = ((one_bits << msb_pos) - 1 ) & tmp;
723+ z_fra = z_fra << ((std::numeric_limits<Ty>::digits - 1 ) - msb_pos);
724+ UTy fra_bits_quo = fra_uint_div (
725+ fra_rem, y_fra, std::numeric_limits<Ty>::digits - msb_pos + 2 );
726+ z_fra = z_fra | (fra_bits_quo >> 3 );
727+ int rb = __handling_rounding (z_sig, z_fra, fra_bits_quo & 0x7 , rd);
728+ if (rb != 0 ) {
729+ z_fra++;
730+ if (z_fra > __iml_fp_config<Ty>::fra_mask) {
731+ z_exp++;
732+ if (z_exp == __iml_fp_config<Ty>::exp_mask)
733+ return __handling_fp_overflow<Ty>(z_sig, rd);
734+ }
735+ }
736+ return __builtin_bit_cast (
737+ Ty, (z_sig << (sizeof (Ty) * 8 - 1 )) |
738+ (z_exp << (std::numeric_limits<Ty>::digits - 1 )) | z_fra);
739+ }
740+
741+ // orignal value can be represented as (0.1xxxx.... * 2^tmp2)
742+ // which is equivalent to 0.00000...1xxxxx * 2^(-126)
743+ tmp2 = tmp2 + 1 ;
744+ if ((tmp2 + std::numeric_limits<Ty>::digits - 1 ) <=
745+ (1 - __iml_fp_config<Ty>::bias)) {
746+ bool above_half = false ;
747+ if ((tmp2 + std::numeric_limits<Ty>::digits - 1 ) ==
748+ (1 - __iml_fp_config<Ty>::bias))
749+ above_half =
750+ !((x_fra == y_fra * tmp) && (tmp == (one_bits << msb_pos)));
751+ return __handling_fp_underflow<Ty, UTy>(z_sig, rd, above_half);
752+ } else {
753+ int rb;
754+ // Fall into subnormal floating point range. For fp32, there are -126 -
755+ // tmp2 leading zeros in final fra and we need get 23 + 126 + tmp2( + grs)
756+ // bits from fraction division.
757+ if (msb_pos >= (std::numeric_limits<Ty>::digits +
758+ __iml_fp_config<Ty>::bias + tmp2)) {
759+ unsigned fra_discard_bits = msb_pos + 3 - __iml_fp_config<Ty>::bias -
760+ std::numeric_limits<Ty>::digits - tmp2;
761+ z_fra = tmp >> fra_discard_bits;
762+ int grs_bits = (tmp >> (fra_discard_bits - 3 )) & 0x7 ;
763+ if ((grs_bits & 0x1 ) == 0x0 ) {
764+ if ((tmp & ((0x1 << (fra_discard_bits - 3 )) - 0x1 )) || (fra_rem != 0 ))
765+ grs_bits = grs_bits | 0x1 ;
766+ }
767+ rb = __handling_rounding (z_sig, z_fra, grs_bits, rd);
768+ } else {
769+ // For fp32, we need to get (23 + 126 + tmp2 + 3) - (msb_pos + 1) bits
770+ // from fra division and the last bit is sticky bit.
771+ z_fra = tmp;
772+ unsigned fra_get_bits = std::numeric_limits<Ty>::digits +
773+ __iml_fp_config<Ty>::bias + tmp2 - msb_pos;
774+ z_fra = z_fra << fra_get_bits;
775+ UTy fra_bits_quo = fra_uint_div (fra_rem, y_fra, fra_get_bits);
776+ z_fra = z_fra | fra_bits_quo;
777+ int grs_bits = z_fra & 0x7 ;
778+ z_fra = z_fra >> 3 ;
779+ rb = __handling_rounding (z_sig, z_fra, grs_bits, rd);
780+ }
781+ if (rb != 0 ) {
782+ z_fra++;
783+ if (z_fra > __iml_fp_config<Ty>::fra_mask) {
784+ z_exp++;
785+ z_fra = 0x0 ;
786+ }
787+ }
788+ return __builtin_bit_cast (
789+ Ty, (z_sig << (sizeof (Ty) * 8 - 1 )) |
790+ (z_exp << (std::numeric_limits<Ty>::digits - 1 )) | z_fra);
791+ }
792+ } else {
793+ // x_fra < y_fra, the final result can be represented as
794+ // (2^exp_diff) * 0.000...01xxxxx
795+ unsigned lz = 0 ;
796+ UTy x_tmp = x_fra;
797+ x_tmp = x_tmp << 1 ;
798+ while (x_tmp < y_fra) {
799+ lz++;
800+ x_tmp = x_tmp << 1 ;
801+ }
802+ // x_fra < y_fra, the final result can be represented as
803+ // (2^exp_diff) * 0.000...01xxxxx... which is equivalent to
804+ // 2 ^ (exp_diff - lz - 1) * 1.xxxxx...
805+ int nor_exp = exp_diff - lz - 1 ;
806+ if (nor_exp > __iml_fp_config<Ty>::bias)
807+ return __handling_fp_overflow<Ty>(z_sig, rd);
808+
809+ if (nor_exp >= (1 - __iml_fp_config<Ty>::bias)) {
810+ z_exp = nor_exp + __iml_fp_config<Ty>::bias;
811+ x_fra = x_fra << lz;
812+ UTy fra_bits_quo =
813+ fra_uint_div (x_fra, y_fra, 3 + std::numeric_limits<Ty>::digits);
814+ z_fra = (fra_bits_quo >> 3 ) & __iml_fp_config<Ty>::fra_mask;
815+ int grs_bits = fra_bits_quo & 0x7 ;
816+ int rb = __handling_rounding (z_sig, z_fra, grs_bits, rd);
817+ if (rb != 0x0 ) {
818+ z_fra++;
819+ if (z_fra > __iml_fp_config<Ty>::fra_mask) {
820+ z_exp++;
821+ z_fra = 0x0 ;
822+ if (z_exp == __iml_fp_config<Ty>::exp_mask)
823+ return __handling_fp_overflow<Ty>(z_sig, rd);
824+ }
825+ }
826+ return __builtin_bit_cast (
827+ Ty, (z_sig << (sizeof (Ty) * 8 - 1 )) |
828+ (z_exp << (std::numeric_limits<Ty>::digits - 1 )) | z_fra);
829+ }
830+
831+ // Fall into subnormal range or underflow happens. For fp32,
832+ // nor_exp < -126, so (-126 - exp_diff + lz + 1) > 0 which means
833+ // (lz - exp_diff - 126) >= 0
834+ unsigned lzs = lz - __iml_fp_config<Ty>::bias - exp_diff + 1 ;
835+ if (lzs >= (std::numeric_limits<Ty>::digits - 1 )) {
836+ bool above_half = false ;
837+ if (lzs == (std::numeric_limits<Ty>::digits - 1 )) {
838+ if ((x_fra << (lz + 1 )) > y_fra)
839+ above_half = true ;
840+ }
841+ return __handling_fp_underflow<Ty>(z_sig, rd, above_half);
842+ } else {
843+ x_fra = x_fra << lz;
844+ UTy fra_bits_quo =
845+ fra_uint_div (x_fra, y_fra, std::numeric_limits<Ty>::digits - lzs + 2 );
846+ z_fra = fra_bits_quo >> 3 ;
847+ int grs_bits = fra_bits_quo & 0x7 ;
848+ int rb = __handling_rounding (z_sig, z_fra, grs_bits, rd);
849+ if (rb != 0x0 ) {
850+ z_fra++;
851+ if (z_fra > __iml_fp_config<Ty>::fra_mask) {
852+ z_exp++;
853+ z_fra = 0x0 ;
854+ }
855+ }
856+ return __builtin_bit_cast (
857+ Ty, (z_sig << (sizeof (Ty) * 8 - 1 )) |
858+ (z_exp << (std::numeric_limits<Ty>::digits - 1 )) | z_fra);
859+ }
860+ }
861+ }
630862#endif
0 commit comments