sys/lib/libkern/softfloat.c - annotate

Return to softfloat.c CVS log
Up to [local] / sys / lib / libkern
Annotation of sys/lib/libkern/softfloat.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: softfloat.c,v 1.1 2002/04/28 20:55:14 pvalchev Exp $  */
        !             2: /*     $NetBSD: softfloat.c,v 1.1 2001/04/26 03:10:47 ross Exp $       */
        !             3:
        !             4: /*
        !             5:  * This version hacked for use with gcc -msoft-float by bjh21.
        !             6:  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
        !             7:  *  itself).
        !             8:  */
        !             9:
        !            10: /*
        !            11:  * Things you may want to define:
        !            12:  *
        !            13:  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
        !            14:  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
        !            15:  *   properly renamed.
        !            16:  */
        !            17:
        !            18: /*
        !            19: ===============================================================================
        !            20:
        !            21: This C source file is part of the SoftFloat IEC/IEEE Floating-point
        !            22: Arithmetic Package, Release 2a.
        !            23:
        !            24: Written by John R. Hauser.  This work was made possible in part by the
        !            25: International Computer Science Institute, located at Suite 600, 1947 Center
        !            26: Street, Berkeley, California 94704.  Funding was partially provided by the
        !            27: National Science Foundation under grant MIP-9311980.  The original version
        !            28: of this code was written as part of a project to build a fixed-point vector
        !            29: processor in collaboration with the University of California at Berkeley,
        !            30: overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
        !            31: is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
        !            32: arithmetic/SoftFloat.html'.
        !            33:
        !            34: THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
        !            35: effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
        !            36: WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
        !            37: RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
        !            38: RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
        !            39: THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
        !            40: (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
        !            41: COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
        !            42: ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
        !            43:
        !            44: Derivative works are acceptable, even for commercial purposes, so long as
        !            45: (1) they include prominent notice that the work is derivative, and (2) they
        !            46: include prominent notice akin to these four paragraphs for those parts of
        !            47: this code that are retained.
        !            48:
        !            49: ===============================================================================
        !            50: */
        !            51:
        !            52: #ifndef NO_IEEE
        !            53:
        !            54: #include <sys/cdefs.h>
        !            55: #if defined(LIBC_SCCS) && !defined(lint)
        !            56: __RCSID("$NetBSD: softfloat.c,v 1.1 2001/04/26 03:10:47 ross Exp $");
        !            57: #endif /* LIBC_SCCS and not lint */
        !            58:
        !            59: #ifdef SOFTFLOAT_FOR_GCC
        !            60: #include "softfloat-for-gcc.h"
        !            61: #endif
        !            62:
        !            63: #include "milieu.h"
        !            64: #include "softfloat.h"
        !            65:
        !            66: /*
        !            67:  * Conversions between floats as stored in memory and floats as
        !            68:  * SoftFloat uses them
        !            69:  */
        !            70: #ifndef FLOAT64_DEMANGLE
        !            71: #define FLOAT64_DEMANGLE(a)    (a)
        !            72: #endif
        !            73: #ifndef FLOAT64_MANGLE
        !            74: #define FLOAT64_MANGLE(a)      (a)
        !            75: #endif
        !            76:
        !            77: /*
        !            78: -------------------------------------------------------------------------------
        !            79: Floating-point rounding mode, extended double-precision rounding precision,
        !            80: and exception flags.
        !            81: -------------------------------------------------------------------------------
        !            82: */
        !            83:
        !            84: /*
        !            85:  * XXX: This may cause options-MULTIPROCESSOR or thread problems someday.
        !            86:  *     Right now, it does not.  I've removed all other dynamic global
        !            87:  *     variables. [ross]
        !            88:  */
        !            89: #ifdef FLOATX80
        !            90: int8 floatx80_rounding_precision = 80;
        !            91: #endif
        !            92:
        !            93: /*
        !            94: -------------------------------------------------------------------------------
        !            95: Primitive arithmetic functions, including multi-word arithmetic, and
        !            96: division and square root approximations.  (Can be specialized to target if
        !            97: desired.)
        !            98: -------------------------------------------------------------------------------
        !            99: */
        !           100: #include "softfloat-macros.h"
        !           101:
        !           102: /*
        !           103: -------------------------------------------------------------------------------
        !           104: Functions and definitions to determine:  (1) whether tininess for underflow
        !           105: is detected before or after rounding by default, (2) what (if anything)
        !           106: happens when exceptions are raised, (3) how signaling NaNs are distinguished
        !           107: from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
        !           108: are propagated from function inputs to output.  These details are target-
        !           109: specific.
        !           110: -------------------------------------------------------------------------------
        !           111: */
        !           112: #include "softfloat-specialize.h"
        !           113:
        !           114: #ifndef SOFTFLOAT_FOR_GCC /* Not used */
        !           115: /*
        !           116: -------------------------------------------------------------------------------
        !           117: Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
        !           118: and 7, and returns the properly rounded 32-bit integer corresponding to the
        !           119: input.  If `zSign' is 1, the input is negated before being converted to an
        !           120: integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
        !           121: is simply rounded to an integer, with the inexact exception raised if the
        !           122: input cannot be represented exactly as an integer.  However, if the fixed-
        !           123: point input is too large, the invalid exception is raised and the largest
        !           124: positive or negative integer is returned.
        !           125: -------------------------------------------------------------------------------
        !           126: */
        !           127: static int32 roundAndPackInt32( flag zSign, bits64 absZ )
        !           128: {
        !           129:     int8 roundingMode;
        !           130:     flag roundNearestEven;
        !           131:     int8 roundIncrement, roundBits;
        !           132:     int32 z;
        !           133:
        !           134:     roundingMode = float_rounding_mode();
        !           135:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !           136:     roundIncrement = 0x40;
        !           137:     if ( ! roundNearestEven ) {
        !           138:         if ( roundingMode == float_round_to_zero ) {
        !           139:             roundIncrement = 0;
        !           140:         }
        !           141:         else {
        !           142:             roundIncrement = 0x7F;
        !           143:             if ( zSign ) {
        !           144:                 if ( roundingMode == float_round_up ) roundIncrement = 0;
        !           145:             }
        !           146:             else {
        !           147:                 if ( roundingMode == float_round_down ) roundIncrement = 0;
        !           148:             }
        !           149:         }
        !           150:     }
        !           151:     roundBits = absZ & 0x7F;
        !           152:     absZ = ( absZ + roundIncrement )>>7;
        !           153:     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
        !           154:     z = absZ;
        !           155:     if ( zSign ) z = - z;
        !           156:     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
        !           157:         float_raise( float_flag_invalid );
        !           158:         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
        !           159:     }
        !           160:     if ( roundBits ) float_set_inexact();
        !           161:     return z;
        !           162:
        !           163: }
        !           164:
        !           165: /*
        !           166: -------------------------------------------------------------------------------
        !           167: Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
        !           168: `absZ1', with binary point between bits 63 and 64 (between the input words),
        !           169: and returns the properly rounded 64-bit integer corresponding to the input.
        !           170: If `zSign' is 1, the input is negated before being converted to an integer.
        !           171: Ordinarily, the fixed-point input is simply rounded to an integer, with
        !           172: the inexact exception raised if the input cannot be represented exactly as
        !           173: an integer.  However, if the fixed-point input is too large, the invalid
        !           174: exception is raised and the largest positive or negative integer is
        !           175: returned.
        !           176: -------------------------------------------------------------------------------
        !           177: */
        !           178: static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
        !           179: {
        !           180:     int8 roundingMode;
        !           181:     flag roundNearestEven, increment;
        !           182:     int64 z;
        !           183:
        !           184:     roundingMode = float_rounding_mode();
        !           185:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !           186:     increment = ( (sbits64) absZ1 < 0 );
        !           187:     if ( ! roundNearestEven ) {
        !           188:         if ( roundingMode == float_round_to_zero ) {
        !           189:             increment = 0;
        !           190:         }
        !           191:         else {
        !           192:             if ( zSign ) {
        !           193:                 increment = ( roundingMode == float_round_down ) && absZ1;
        !           194:             }
        !           195:             else {
        !           196:                 increment = ( roundingMode == float_round_up ) && absZ1;
        !           197:             }
        !           198:         }
        !           199:     }
        !           200:     if ( increment ) {
        !           201:         ++absZ0;
        !           202:         if ( absZ0 == 0 ) goto overflow;
        !           203:         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
        !           204:     }
        !           205:     z = absZ0;
        !           206:     if ( zSign ) z = - z;
        !           207:     if ( z && ( ( z < 0 ) ^ zSign ) ) {
        !           208:  overflow:
        !           209:         float_raise( float_flag_invalid );
        !           210:         return
        !           211:               zSign ? (sbits64) LIT64( 0x8000000000000000 )
        !           212:             : LIT64( 0x7FFFFFFFFFFFFFFF );
        !           213:     }
        !           214:     if ( absZ1 ) float_set_inexact();
        !           215:     return z;
        !           216:
        !           217: }
        !           218: #endif
        !           219:
        !           220: /*
        !           221: -------------------------------------------------------------------------------
        !           222: Returns the fraction bits of the single-precision floating-point value `a'.
        !           223: -------------------------------------------------------------------------------
        !           224: */
        !           225: INLINE bits32 extractFloat32Frac( float32 a )
        !           226: {
        !           227:
        !           228:     return a & 0x007FFFFF;
        !           229:
        !           230: }
        !           231:
        !           232: /*
        !           233: -------------------------------------------------------------------------------
        !           234: Returns the exponent bits of the single-precision floating-point value `a'.
        !           235: -------------------------------------------------------------------------------
        !           236: */
        !           237: INLINE int16 extractFloat32Exp( float32 a )
        !           238: {
        !           239:
        !           240:     return ( a>>23 ) & 0xFF;
        !           241:
        !           242: }
        !           243:
        !           244: /*
        !           245: -------------------------------------------------------------------------------
        !           246: Returns the sign bit of the single-precision floating-point value `a'.
        !           247: -------------------------------------------------------------------------------
        !           248: */
        !           249: INLINE flag extractFloat32Sign( float32 a )
        !           250: {
        !           251:
        !           252:     return a>>31;
        !           253:
        !           254: }
        !           255:
        !           256: /*
        !           257: -------------------------------------------------------------------------------
        !           258: Normalizes the subnormal single-precision floating-point value represented
        !           259: by the denormalized significand `aSig'.  The normalized exponent and
        !           260: significand are stored at the locations pointed to by `zExpPtr' and
        !           261: `zSigPtr', respectively.
        !           262: -------------------------------------------------------------------------------
        !           263: */
        !           264: static void
        !           265:  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
        !           266: {
        !           267:     int8 shiftCount;
        !           268:
        !           269:     shiftCount = countLeadingZeros32( aSig ) - 8;
        !           270:     *zSigPtr = aSig<<shiftCount;
        !           271:     *zExpPtr = 1 - shiftCount;
        !           272:
        !           273: }
        !           274:
        !           275: /*
        !           276: -------------------------------------------------------------------------------
        !           277: Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
        !           278: single-precision floating-point value, returning the result.  After being
        !           279: shifted into the proper positions, the three fields are simply added
        !           280: together to form the result.  This means that any integer portion of `zSig'
        !           281: will be added into the exponent.  Since a properly normalized significand
        !           282: will have an integer portion equal to 1, the `zExp' input should be 1 less
        !           283: than the desired result exponent whenever `zSig' is a complete, normalized
        !           284: significand.
        !           285: -------------------------------------------------------------------------------
        !           286: */
        !           287: INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
        !           288: {
        !           289:
        !           290:     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
        !           291:
        !           292: }
        !           293:
        !           294: /*
        !           295: -------------------------------------------------------------------------------
        !           296: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           297: and significand `zSig', and returns the proper single-precision floating-
        !           298: point value corresponding to the abstract input.  Ordinarily, the abstract
        !           299: value is simply rounded and packed into the single-precision format, with
        !           300: the inexact exception raised if the abstract input cannot be represented
        !           301: exactly.  However, if the abstract value is too large, the overflow and
        !           302: inexact exceptions are raised and an infinity or maximal finite value is
        !           303: returned.  If the abstract value is too small, the input value is rounded to
        !           304: a subnormal number, and the underflow and inexact exceptions are raised if
        !           305: the abstract input cannot be represented exactly as a subnormal single-
        !           306: precision floating-point number.
        !           307:     The input significand `zSig' has its binary point between bits 30
        !           308: and 29, which is 7 bits to the left of the usual location.  This shifted
        !           309: significand must be normalized or smaller.  If `zSig' is not normalized,
        !           310: `zExp' must be 0; in that case, the result returned is a subnormal number,
        !           311: and it must not require rounding.  In the usual case that `zSig' is
        !           312: normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
        !           313: The handling of underflow and overflow follows the IEC/IEEE Standard for
        !           314: Binary Floating-Point Arithmetic.
        !           315: -------------------------------------------------------------------------------
        !           316: */
        !           317: static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
        !           318: {
        !           319:     int8 roundingMode;
        !           320:     flag roundNearestEven;
        !           321:     int8 roundIncrement, roundBits;
        !           322:     flag isTiny;
        !           323:
        !           324:     roundingMode = float_rounding_mode();
        !           325:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !           326:     roundIncrement = 0x40;
        !           327:     if ( ! roundNearestEven ) {
        !           328:         if ( roundingMode == float_round_to_zero ) {
        !           329:             roundIncrement = 0;
        !           330:         }
        !           331:         else {
        !           332:             roundIncrement = 0x7F;
        !           333:             if ( zSign ) {
        !           334:                 if ( roundingMode == float_round_up ) roundIncrement = 0;
        !           335:             }
        !           336:             else {
        !           337:                 if ( roundingMode == float_round_down ) roundIncrement = 0;
        !           338:             }
        !           339:         }
        !           340:     }
        !           341:     roundBits = zSig & 0x7F;
        !           342:     if ( 0xFD <= (bits16) zExp ) {
        !           343:         if (    ( 0xFD < zExp )
        !           344:              || (    ( zExp == 0xFD )
        !           345:                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
        !           346:            ) {
        !           347:             float_raise( float_flag_overflow | float_flag_inexact );
        !           348:             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
        !           349:         }
        !           350:         if ( zExp < 0 ) {
        !           351:             isTiny =
        !           352:                    ( float_detect_tininess == float_tininess_before_rounding )
        !           353:                 || ( zExp < -1 )
        !           354:                 || ( zSig + roundIncrement < 0x80000000 );
        !           355:             shift32RightJamming( zSig, - zExp, &zSig );
        !           356:             zExp = 0;
        !           357:             roundBits = zSig & 0x7F;
        !           358:             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
        !           359:         }
        !           360:     }
        !           361:     if ( roundBits ) float_set_inexact();
        !           362:     zSig = ( zSig + roundIncrement )>>7;
        !           363:     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
        !           364:     if ( zSig == 0 ) zExp = 0;
        !           365:     return packFloat32( zSign, zExp, zSig );
        !           366:
        !           367: }
        !           368:
        !           369: /*
        !           370: -------------------------------------------------------------------------------
        !           371: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           372: and significand `zSig', and returns the proper single-precision floating-
        !           373: point value corresponding to the abstract input.  This routine is just like
        !           374: `roundAndPackFloat32' except that `zSig' does not have to be normalized.
        !           375: Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
        !           376: floating-point exponent.
        !           377: -------------------------------------------------------------------------------
        !           378: */
        !           379: static float32
        !           380:  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
        !           381: {
        !           382:     int8 shiftCount;
        !           383:
        !           384:     shiftCount = countLeadingZeros32( zSig ) - 1;
        !           385:     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
        !           386:
        !           387: }
        !           388:
        !           389: /*
        !           390: -------------------------------------------------------------------------------
        !           391: Returns the fraction bits of the double-precision floating-point value `a'.
        !           392: -------------------------------------------------------------------------------
        !           393: */
        !           394: INLINE bits64 extractFloat64Frac( float64 a )
        !           395: {
        !           396:
        !           397:     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
        !           398:
        !           399: }
        !           400:
        !           401: /*
        !           402: -------------------------------------------------------------------------------
        !           403: Returns the exponent bits of the double-precision floating-point value `a'.
        !           404: -------------------------------------------------------------------------------
        !           405: */
        !           406: INLINE int16 extractFloat64Exp( float64 a )
        !           407: {
        !           408:
        !           409:     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
        !           410:
        !           411: }
        !           412:
        !           413: /*
        !           414: -------------------------------------------------------------------------------
        !           415: Returns the sign bit of the double-precision floating-point value `a'.
        !           416: -------------------------------------------------------------------------------
        !           417: */
        !           418: INLINE flag extractFloat64Sign( float64 a )
        !           419: {
        !           420:
        !           421:     return FLOAT64_DEMANGLE(a)>>63;
        !           422:
        !           423: }
        !           424:
        !           425: /*
        !           426: -------------------------------------------------------------------------------
        !           427: Normalizes the subnormal double-precision floating-point value represented
        !           428: by the denormalized significand `aSig'.  The normalized exponent and
        !           429: significand are stored at the locations pointed to by `zExpPtr' and
        !           430: `zSigPtr', respectively.
        !           431: -------------------------------------------------------------------------------
        !           432: */
        !           433: static void
        !           434:  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
        !           435: {
        !           436:     int8 shiftCount;
        !           437:
        !           438:     shiftCount = countLeadingZeros64( aSig ) - 11;
        !           439:     *zSigPtr = aSig<<shiftCount;
        !           440:     *zExpPtr = 1 - shiftCount;
        !           441:
        !           442: }
        !           443:
        !           444: /*
        !           445: -------------------------------------------------------------------------------
        !           446: Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
        !           447: double-precision floating-point value, returning the result.  After being
        !           448: shifted into the proper positions, the three fields are simply added
        !           449: together to form the result.  This means that any integer portion of `zSig'
        !           450: will be added into the exponent.  Since a properly normalized significand
        !           451: will have an integer portion equal to 1, the `zExp' input should be 1 less
        !           452: than the desired result exponent whenever `zSig' is a complete, normalized
        !           453: significand.
        !           454: -------------------------------------------------------------------------------
        !           455: */
        !           456: INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
        !           457: {
        !           458:
        !           459:     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
        !           460:                           ( ( (bits64) zExp )<<52 ) + zSig );
        !           461:
        !           462: }
        !           463:
        !           464: /*
        !           465: -------------------------------------------------------------------------------
        !           466: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           467: and significand `zSig', and returns the proper double-precision floating-
        !           468: point value corresponding to the abstract input.  Ordinarily, the abstract
        !           469: value is simply rounded and packed into the double-precision format, with
        !           470: the inexact exception raised if the abstract input cannot be represented
        !           471: exactly.  However, if the abstract value is too large, the overflow and
        !           472: inexact exceptions are raised and an infinity or maximal finite value is
        !           473: returned.  If the abstract value is too small, the input value is rounded to
        !           474: a subnormal number, and the underflow and inexact exceptions are raised if
        !           475: the abstract input cannot be represented exactly as a subnormal double-
        !           476: precision floating-point number.
        !           477:     The input significand `zSig' has its binary point between bits 62
        !           478: and 61, which is 10 bits to the left of the usual location.  This shifted
        !           479: significand must be normalized or smaller.  If `zSig' is not normalized,
        !           480: `zExp' must be 0; in that case, the result returned is a subnormal number,
        !           481: and it must not require rounding.  In the usual case that `zSig' is
        !           482: normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
        !           483: The handling of underflow and overflow follows the IEC/IEEE Standard for
        !           484: Binary Floating-Point Arithmetic.
        !           485: -------------------------------------------------------------------------------
        !           486: */
        !           487: static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
        !           488: {
        !           489:     int8 roundingMode;
        !           490:     flag roundNearestEven;
        !           491:     int16 roundIncrement, roundBits;
        !           492:     flag isTiny;
        !           493:
        !           494:     roundingMode = float_rounding_mode();
        !           495:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !           496:     roundIncrement = 0x200;
        !           497:     if ( ! roundNearestEven ) {
        !           498:         if ( roundingMode == float_round_to_zero ) {
        !           499:             roundIncrement = 0;
        !           500:         }
        !           501:         else {
        !           502:             roundIncrement = 0x3FF;
        !           503:             if ( zSign ) {
        !           504:                 if ( roundingMode == float_round_up ) roundIncrement = 0;
        !           505:             }
        !           506:             else {
        !           507:                 if ( roundingMode == float_round_down ) roundIncrement = 0;
        !           508:             }
        !           509:         }
        !           510:     }
        !           511:     roundBits = zSig & 0x3FF;
        !           512:     if ( 0x7FD <= (bits16) zExp ) {
        !           513:         if (    ( 0x7FD < zExp )
        !           514:              || (    ( zExp == 0x7FD )
        !           515:                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
        !           516:            ) {
        !           517:             float_raise( float_flag_overflow | float_flag_inexact );
        !           518:             return FLOAT64_MANGLE(
        !           519:                FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
        !           520:                ( roundIncrement == 0 ));
        !           521:         }
        !           522:         if ( zExp < 0 ) {
        !           523:             isTiny =
        !           524:                    ( float_detect_tininess == float_tininess_before_rounding )
        !           525:                 || ( zExp < -1 )
        !           526:                 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
        !           527:             shift64RightJamming( zSig, - zExp, &zSig );
        !           528:             zExp = 0;
        !           529:             roundBits = zSig & 0x3FF;
        !           530:             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
        !           531:         }
        !           532:     }
        !           533:     if ( roundBits ) float_set_inexact();
        !           534:     zSig = ( zSig + roundIncrement )>>10;
        !           535:     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
        !           536:     if ( zSig == 0 ) zExp = 0;
        !           537:     return packFloat64( zSign, zExp, zSig );
        !           538:
        !           539: }
        !           540:
        !           541: /*
        !           542: -------------------------------------------------------------------------------
        !           543: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           544: and significand `zSig', and returns the proper double-precision floating-
        !           545: point value corresponding to the abstract input.  This routine is just like
        !           546: `roundAndPackFloat64' except that `zSig' does not have to be normalized.
        !           547: Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
        !           548: floating-point exponent.
        !           549: -------------------------------------------------------------------------------
        !           550: */
        !           551: static float64
        !           552:  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
        !           553: {
        !           554:     int8 shiftCount;
        !           555:
        !           556:     shiftCount = countLeadingZeros64( zSig ) - 1;
        !           557:     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
        !           558:
        !           559: }
        !           560:
        !           561: #ifdef FLOATX80
        !           562:
        !           563: /*
        !           564: -------------------------------------------------------------------------------
        !           565: Returns the fraction bits of the extended double-precision floating-point
        !           566: value `a'.
        !           567: -------------------------------------------------------------------------------
        !           568: */
        !           569: INLINE bits64 extractFloatx80Frac( floatx80 a )
        !           570: {
        !           571:
        !           572:     return a.low;
        !           573:
        !           574: }
        !           575:
        !           576: /*
        !           577: -------------------------------------------------------------------------------
        !           578: Returns the exponent bits of the extended double-precision floating-point
        !           579: value `a'.
        !           580: -------------------------------------------------------------------------------
        !           581: */
        !           582: INLINE int32 extractFloatx80Exp( floatx80 a )
        !           583: {
        !           584:
        !           585:     return a.high & 0x7FFF;
        !           586:
        !           587: }
        !           588:
        !           589: /*
        !           590: -------------------------------------------------------------------------------
        !           591: Returns the sign bit of the extended double-precision floating-point value
        !           592: `a'.
        !           593: -------------------------------------------------------------------------------
        !           594: */
        !           595: INLINE flag extractFloatx80Sign( floatx80 a )
        !           596: {
        !           597:
        !           598:     return a.high>>15;
        !           599:
        !           600: }
        !           601:
        !           602: /*
        !           603: -------------------------------------------------------------------------------
        !           604: Normalizes the subnormal extended double-precision floating-point value
        !           605: represented by the denormalized significand `aSig'.  The normalized exponent
        !           606: and significand are stored at the locations pointed to by `zExpPtr' and
        !           607: `zSigPtr', respectively.
        !           608: -------------------------------------------------------------------------------
        !           609: */
        !           610: static void
        !           611:  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
        !           612: {
        !           613:     int8 shiftCount;
        !           614:
        !           615:     shiftCount = countLeadingZeros64( aSig );
        !           616:     *zSigPtr = aSig<<shiftCount;
        !           617:     *zExpPtr = 1 - shiftCount;
        !           618:
        !           619: }
        !           620:
        !           621: /*
        !           622: -------------------------------------------------------------------------------
        !           623: Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
        !           624: extended double-precision floating-point value, returning the result.
        !           625: -------------------------------------------------------------------------------
        !           626: */
        !           627: INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
        !           628: {
        !           629:     floatx80 z;
        !           630:
        !           631:     z.low = zSig;
        !           632:     z.high = ( ( (bits16) zSign )<<15 ) + zExp;
        !           633:     return z;
        !           634:
        !           635: }
        !           636:
        !           637: /*
        !           638: -------------------------------------------------------------------------------
        !           639: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           640: and extended significand formed by the concatenation of `zSig0' and `zSig1',
        !           641: and returns the proper extended double-precision floating-point value
        !           642: corresponding to the abstract input.  Ordinarily, the abstract value is
        !           643: rounded and packed into the extended double-precision format, with the
        !           644: inexact exception raised if the abstract input cannot be represented
        !           645: exactly.  However, if the abstract value is too large, the overflow and
        !           646: inexact exceptions are raised and an infinity or maximal finite value is
        !           647: returned.  If the abstract value is too small, the input value is rounded to
        !           648: a subnormal number, and the underflow and inexact exceptions are raised if
        !           649: the abstract input cannot be represented exactly as a subnormal extended
        !           650: double-precision floating-point number.
        !           651:     If `roundingPrecision' is 32 or 64, the result is rounded to the same
        !           652: number of bits as single or double precision, respectively.  Otherwise, the
        !           653: result is rounded to the full precision of the extended double-precision
        !           654: format.
        !           655:     The input significand must be normalized or smaller.  If the input
        !           656: significand is not normalized, `zExp' must be 0; in that case, the result
        !           657: returned is a subnormal number, and it must not require rounding.  The
        !           658: handling of underflow and overflow follows the IEC/IEEE Standard for Binary
        !           659: Floating-Point Arithmetic.
        !           660: -------------------------------------------------------------------------------
        !           661: */
        !           662: static floatx80
        !           663:  roundAndPackFloatx80(
        !           664:      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
        !           665:  )
        !           666: {
        !           667:     int8 roundingMode;
        !           668:     flag roundNearestEven, increment, isTiny;
        !           669:     int64 roundIncrement, roundMask, roundBits;
        !           670:
        !           671:     roundingMode = float_rounding_mode();
        !           672:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !           673:     if ( roundingPrecision == 80 ) goto precision80;
        !           674:     if ( roundingPrecision == 64 ) {
        !           675:         roundIncrement = LIT64( 0x0000000000000400 );
        !           676:         roundMask = LIT64( 0x00000000000007FF );
        !           677:     }
        !           678:     else if ( roundingPrecision == 32 ) {
        !           679:         roundIncrement = LIT64( 0x0000008000000000 );
        !           680:         roundMask = LIT64( 0x000000FFFFFFFFFF );
        !           681:     }
        !           682:     else {
        !           683:         goto precision80;
        !           684:     }
        !           685:     zSig0 |= ( zSig1 != 0 );
        !           686:     if ( ! roundNearestEven ) {
        !           687:         if ( roundingMode == float_round_to_zero ) {
        !           688:             roundIncrement = 0;
        !           689:         }
        !           690:         else {
        !           691:             roundIncrement = roundMask;
        !           692:             if ( zSign ) {
        !           693:                 if ( roundingMode == float_round_up ) roundIncrement = 0;
        !           694:             }
        !           695:             else {
        !           696:                 if ( roundingMode == float_round_down ) roundIncrement = 0;
        !           697:             }
        !           698:         }
        !           699:     }
        !           700:     roundBits = zSig0 & roundMask;
        !           701:     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
        !           702:         if (    ( 0x7FFE < zExp )
        !           703:              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
        !           704:            ) {
        !           705:             goto overflow;
        !           706:         }
        !           707:         if ( zExp <= 0 ) {
        !           708:             isTiny =
        !           709:                    ( float_detect_tininess == float_tininess_before_rounding )
        !           710:                 || ( zExp < 0 )
        !           711:                 || ( zSig0 <= zSig0 + roundIncrement );
        !           712:             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
        !           713:             zExp = 0;
        !           714:             roundBits = zSig0 & roundMask;
        !           715:             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
        !           716:             if ( roundBits ) float_set_inexact();
        !           717:             zSig0 += roundIncrement;
        !           718:             if ( (sbits64) zSig0 < 0 ) zExp = 1;
        !           719:             roundIncrement = roundMask + 1;
        !           720:             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
        !           721:                 roundMask |= roundIncrement;
        !           722:             }
        !           723:             zSig0 &= ~ roundMask;
        !           724:             return packFloatx80( zSign, zExp, zSig0 );
        !           725:         }
        !           726:     }
        !           727:     if ( roundBits ) float_set_inexact();
        !           728:     zSig0 += roundIncrement;
        !           729:     if ( zSig0 < roundIncrement ) {
        !           730:         ++zExp;
        !           731:         zSig0 = LIT64( 0x8000000000000000 );
        !           732:     }
        !           733:     roundIncrement = roundMask + 1;
        !           734:     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
        !           735:         roundMask |= roundIncrement;
        !           736:     }
        !           737:     zSig0 &= ~ roundMask;
        !           738:     if ( zSig0 == 0 ) zExp = 0;
        !           739:     return packFloatx80( zSign, zExp, zSig0 );
        !           740:  precision80:
        !           741:     increment = ( (sbits64) zSig1 < 0 );
        !           742:     if ( ! roundNearestEven ) {
        !           743:         if ( roundingMode == float_round_to_zero ) {
        !           744:             increment = 0;
        !           745:         }
        !           746:         else {
        !           747:             if ( zSign ) {
        !           748:                 increment = ( roundingMode == float_round_down ) && zSig1;
        !           749:             }
        !           750:             else {
        !           751:                 increment = ( roundingMode == float_round_up ) && zSig1;
        !           752:             }
        !           753:         }
        !           754:     }
        !           755:     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
        !           756:         if (    ( 0x7FFE < zExp )
        !           757:              || (    ( zExp == 0x7FFE )
        !           758:                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
        !           759:                   && increment
        !           760:                 )
        !           761:            ) {
        !           762:             roundMask = 0;
        !           763:  overflow:
        !           764:             float_raise( float_flag_overflow | float_flag_inexact );
        !           765:             if (    ( roundingMode == float_round_to_zero )
        !           766:                  || ( zSign && ( roundingMode == float_round_up ) )
        !           767:                  || ( ! zSign && ( roundingMode == float_round_down ) )
        !           768:                ) {
        !           769:                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
        !           770:             }
        !           771:             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !           772:         }
        !           773:         if ( zExp <= 0 ) {
        !           774:             isTiny =
        !           775:                    ( float_detect_tininess == float_tininess_before_rounding )
        !           776:                 || ( zExp < 0 )
        !           777:                 || ! increment
        !           778:                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
        !           779:             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
        !           780:             zExp = 0;
        !           781:             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
        !           782:             if ( zSig1 ) float_set_inexact();
        !           783:             if ( roundNearestEven ) {
        !           784:                 increment = ( (sbits64) zSig1 < 0 );
        !           785:             }
        !           786:             else {
        !           787:                 if ( zSign ) {
        !           788:                     increment = ( roundingMode == float_round_down ) && zSig1;
        !           789:                 }
        !           790:                 else {
        !           791:                     increment = ( roundingMode == float_round_up ) && zSig1;
        !           792:                 }
        !           793:             }
        !           794:             if ( increment ) {
        !           795:                 ++zSig0;
        !           796:                 zSig0 &=
        !           797:                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
        !           798:                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
        !           799:             }
        !           800:             return packFloatx80( zSign, zExp, zSig0 );
        !           801:         }
        !           802:     }
        !           803:     if ( zSig1 ) float_set_inexact();
        !           804:     if ( increment ) {
        !           805:         ++zSig0;
        !           806:         if ( zSig0 == 0 ) {
        !           807:             ++zExp;
        !           808:             zSig0 = LIT64( 0x8000000000000000 );
        !           809:         }
        !           810:         else {
        !           811:             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
        !           812:         }
        !           813:     }
        !           814:     else {
        !           815:         if ( zSig0 == 0 ) zExp = 0;
        !           816:     }
        !           817:     return packFloatx80( zSign, zExp, zSig0 );
        !           818:
        !           819: }
        !           820:
        !           821: /*
        !           822: -------------------------------------------------------------------------------
        !           823: Takes an abstract floating-point value having sign `zSign', exponent
        !           824: `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
        !           825: and returns the proper extended double-precision floating-point value
        !           826: corresponding to the abstract input.  This routine is just like
        !           827: `roundAndPackFloatx80' except that the input significand does not have to be
        !           828: normalized.
        !           829: -------------------------------------------------------------------------------
        !           830: */
        !           831: static floatx80
        !           832:  normalizeRoundAndPackFloatx80(
        !           833:      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
        !           834:  )
        !           835: {
        !           836:     int8 shiftCount;
        !           837:
        !           838:     if ( zSig0 == 0 ) {
        !           839:         zSig0 = zSig1;
        !           840:         zSig1 = 0;
        !           841:         zExp -= 64;
        !           842:     }
        !           843:     shiftCount = countLeadingZeros64( zSig0 );
        !           844:     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
        !           845:     zExp -= shiftCount;
        !           846:     return
        !           847:         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
        !           848:
        !           849: }
        !           850:
        !           851: #endif
        !           852:
        !           853: #ifdef FLOAT128
        !           854:
        !           855: /*
        !           856: -------------------------------------------------------------------------------
        !           857: Returns the least-significant 64 fraction bits of the quadruple-precision
        !           858: floating-point value `a'.
        !           859: -------------------------------------------------------------------------------
        !           860: */
        !           861: INLINE bits64 extractFloat128Frac1( float128 a )
        !           862: {
        !           863:
        !           864:     return a.low;
        !           865:
        !           866: }
        !           867:
        !           868: /*
        !           869: -------------------------------------------------------------------------------
        !           870: Returns the most-significant 48 fraction bits of the quadruple-precision
        !           871: floating-point value `a'.
        !           872: -------------------------------------------------------------------------------
        !           873: */
        !           874: INLINE bits64 extractFloat128Frac0( float128 a )
        !           875: {
        !           876:
        !           877:     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
        !           878:
        !           879: }
        !           880:
        !           881: /*
        !           882: -------------------------------------------------------------------------------
        !           883: Returns the exponent bits of the quadruple-precision floating-point value
        !           884: `a'.
        !           885: -------------------------------------------------------------------------------
        !           886: */
        !           887: INLINE int32 extractFloat128Exp( float128 a )
        !           888: {
        !           889:
        !           890:     return ( a.high>>48 ) & 0x7FFF;
        !           891:
        !           892: }
        !           893:
        !           894: /*
        !           895: -------------------------------------------------------------------------------
        !           896: Returns the sign bit of the quadruple-precision floating-point value `a'.
        !           897: -------------------------------------------------------------------------------
        !           898: */
        !           899: INLINE flag extractFloat128Sign( float128 a )
        !           900: {
        !           901:
        !           902:     return a.high>>63;
        !           903:
        !           904: }
        !           905:
        !           906: /*
        !           907: -------------------------------------------------------------------------------
        !           908: Normalizes the subnormal quadruple-precision floating-point value
        !           909: represented by the denormalized significand formed by the concatenation of
        !           910: `aSig0' and `aSig1'.  The normalized exponent is stored at the location
        !           911: pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
        !           912: significand are stored at the location pointed to by `zSig0Ptr', and the
        !           913: least significant 64 bits of the normalized significand are stored at the
        !           914: location pointed to by `zSig1Ptr'.
        !           915: -------------------------------------------------------------------------------
        !           916: */
        !           917: static void
        !           918:  normalizeFloat128Subnormal(
        !           919:      bits64 aSig0,
        !           920:      bits64 aSig1,
        !           921:      int32 *zExpPtr,
        !           922:      bits64 *zSig0Ptr,
        !           923:      bits64 *zSig1Ptr
        !           924:  )
        !           925: {
        !           926:     int8 shiftCount;
        !           927:
        !           928:     if ( aSig0 == 0 ) {
        !           929:         shiftCount = countLeadingZeros64( aSig1 ) - 15;
        !           930:         if ( shiftCount < 0 ) {
        !           931:             *zSig0Ptr = aSig1>>( - shiftCount );
        !           932:             *zSig1Ptr = aSig1<<( shiftCount & 63 );
        !           933:         }
        !           934:         else {
        !           935:             *zSig0Ptr = aSig1<<shiftCount;
        !           936:             *zSig1Ptr = 0;
        !           937:         }
        !           938:         *zExpPtr = - shiftCount - 63;
        !           939:     }
        !           940:     else {
        !           941:         shiftCount = countLeadingZeros64( aSig0 ) - 15;
        !           942:         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
        !           943:         *zExpPtr = 1 - shiftCount;
        !           944:     }
        !           945:
        !           946: }
        !           947:
        !           948: /*
        !           949: -------------------------------------------------------------------------------
        !           950: Packs the sign `zSign', the exponent `zExp', and the significand formed
        !           951: by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
        !           952: floating-point value, returning the result.  After being shifted into the
        !           953: proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
        !           954: added together to form the most significant 32 bits of the result.  This
        !           955: means that any integer portion of `zSig0' will be added into the exponent.
        !           956: Since a properly normalized significand will have an integer portion equal
        !           957: to 1, the `zExp' input should be 1 less than the desired result exponent
        !           958: whenever `zSig0' and `zSig1' concatenated form a complete, normalized
        !           959: significand.
        !           960: -------------------------------------------------------------------------------
        !           961: */
        !           962: INLINE float128
        !           963:  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
        !           964: {
        !           965:     float128 z;
        !           966:
        !           967:     z.low = zSig1;
        !           968:     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
        !           969:     return z;
        !           970:
        !           971: }
        !           972:
        !           973: /*
        !           974: -------------------------------------------------------------------------------
        !           975: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !           976: and extended significand formed by the concatenation of `zSig0', `zSig1',
        !           977: and `zSig2', and returns the proper quadruple-precision floating-point value
        !           978: corresponding to the abstract input.  Ordinarily, the abstract value is
        !           979: simply rounded and packed into the quadruple-precision format, with the
        !           980: inexact exception raised if the abstract input cannot be represented
        !           981: exactly.  However, if the abstract value is too large, the overflow and
        !           982: inexact exceptions are raised and an infinity or maximal finite value is
        !           983: returned.  If the abstract value is too small, the input value is rounded to
        !           984: a subnormal number, and the underflow and inexact exceptions are raised if
        !           985: the abstract input cannot be represented exactly as a subnormal quadruple-
        !           986: precision floating-point number.
        !           987:     The input significand must be normalized or smaller.  If the input
        !           988: significand is not normalized, `zExp' must be 0; in that case, the result
        !           989: returned is a subnormal number, and it must not require rounding.  In the
        !           990: usual case that the input significand is normalized, `zExp' must be 1 less
        !           991: than the ``true'' floating-point exponent.  The handling of underflow and
        !           992: overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !           993: -------------------------------------------------------------------------------
        !           994: */
        !           995: static float128
        !           996:  roundAndPackFloat128(
        !           997:      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
        !           998: {
        !           999:     int8 roundingMode;
        !          1000:     flag roundNearestEven, increment, isTiny;
        !          1001:
        !          1002:     roundingMode = float_rounding_mode();
        !          1003:     roundNearestEven = ( roundingMode == float_round_nearest_even );
        !          1004:     increment = ( (sbits64) zSig2 < 0 );
        !          1005:     if ( ! roundNearestEven ) {
        !          1006:         if ( roundingMode == float_round_to_zero ) {
        !          1007:             increment = 0;
        !          1008:         }
        !          1009:         else {
        !          1010:             if ( zSign ) {
        !          1011:                 increment = ( roundingMode == float_round_down ) && zSig2;
        !          1012:             }
        !          1013:             else {
        !          1014:                 increment = ( roundingMode == float_round_up ) && zSig2;
        !          1015:             }
        !          1016:         }
        !          1017:     }
        !          1018:     if ( 0x7FFD <= (bits32) zExp ) {
        !          1019:         if (    ( 0x7FFD < zExp )
        !          1020:              || (    ( zExp == 0x7FFD )
        !          1021:                   && eq128(
        !          1022:                          LIT64( 0x0001FFFFFFFFFFFF ),
        !          1023:                          LIT64( 0xFFFFFFFFFFFFFFFF ),
        !          1024:                          zSig0,
        !          1025:                          zSig1
        !          1026:                      )
        !          1027:                   && increment
        !          1028:                 )
        !          1029:            ) {
        !          1030:             float_raise( float_flag_overflow | float_flag_inexact );
        !          1031:             if (    ( roundingMode == float_round_to_zero )
        !          1032:                  || ( zSign && ( roundingMode == float_round_up ) )
        !          1033:                  || ( ! zSign && ( roundingMode == float_round_down ) )
        !          1034:                ) {
        !          1035:                 return
        !          1036:                     packFloat128(
        !          1037:                         zSign,
        !          1038:                         0x7FFE,
        !          1039:                         LIT64( 0x0000FFFFFFFFFFFF ),
        !          1040:                         LIT64( 0xFFFFFFFFFFFFFFFF )
        !          1041:                     );
        !          1042:             }
        !          1043:             return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          1044:         }
        !          1045:         if ( zExp < 0 ) {
        !          1046:             isTiny =
        !          1047:                    ( float_detect_tininess == float_tininess_before_rounding )
        !          1048:                 || ( zExp < -1 )
        !          1049:                 || ! increment
        !          1050:                 || lt128(
        !          1051:                        zSig0,
        !          1052:                        zSig1,
        !          1053:                        LIT64( 0x0001FFFFFFFFFFFF ),
        !          1054:                        LIT64( 0xFFFFFFFFFFFFFFFF )
        !          1055:                    );
        !          1056:             shift128ExtraRightJamming(
        !          1057:                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
        !          1058:             zExp = 0;
        !          1059:             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
        !          1060:             if ( roundNearestEven ) {
        !          1061:                 increment = ( (sbits64) zSig2 < 0 );
        !          1062:             }
        !          1063:             else {
        !          1064:                 if ( zSign ) {
        !          1065:                     increment = ( roundingMode == float_round_down ) && zSig2;
        !          1066:                 }
        !          1067:                 else {
        !          1068:                     increment = ( roundingMode == float_round_up ) && zSig2;
        !          1069:                 }
        !          1070:             }
        !          1071:         }
        !          1072:     }
        !          1073:     if ( zSig2 ) float_set_inexact();
        !          1074:     if ( increment ) {
        !          1075:         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
        !          1076:         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
        !          1077:     }
        !          1078:     else {
        !          1079:         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
        !          1080:     }
        !          1081:     return packFloat128( zSign, zExp, zSig0, zSig1 );
        !          1082:
        !          1083: }
        !          1084:
        !          1085: /*
        !          1086: -------------------------------------------------------------------------------
        !          1087: Takes an abstract floating-point value having sign `zSign', exponent `zExp',
        !          1088: and significand formed by the concatenation of `zSig0' and `zSig1', and
        !          1089: returns the proper quadruple-precision floating-point value corresponding
        !          1090: to the abstract input.  This routine is just like `roundAndPackFloat128'
        !          1091: except that the input significand has fewer bits and does not have to be
        !          1092: normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
        !          1093: point exponent.
        !          1094: -------------------------------------------------------------------------------
        !          1095: */
        !          1096: static float128
        !          1097:  normalizeRoundAndPackFloat128(
        !          1098:      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
        !          1099: {
        !          1100:     int8 shiftCount;
        !          1101:     bits64 zSig2;
        !          1102:
        !          1103:     if ( zSig0 == 0 ) {
        !          1104:         zSig0 = zSig1;
        !          1105:         zSig1 = 0;
        !          1106:         zExp -= 64;
        !          1107:     }
        !          1108:     shiftCount = countLeadingZeros64( zSig0 ) - 15;
        !          1109:     if ( 0 <= shiftCount ) {
        !          1110:         zSig2 = 0;
        !          1111:         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
        !          1112:     }
        !          1113:     else {
        !          1114:         shift128ExtraRightJamming(
        !          1115:             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
        !          1116:     }
        !          1117:     zExp -= shiftCount;
        !          1118:     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
        !          1119:
        !          1120: }
        !          1121:
        !          1122: #endif
        !          1123:
        !          1124: /*
        !          1125: -------------------------------------------------------------------------------
        !          1126: Returns the result of converting the 32-bit two's complement integer `a'
        !          1127: to the single-precision floating-point format.  The conversion is performed
        !          1128: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1129: -------------------------------------------------------------------------------
        !          1130: */
        !          1131: float32 int32_to_float32( int32 a )
        !          1132: {
        !          1133:     flag zSign;
        !          1134:
        !          1135:     if ( a == 0 ) return 0;
        !          1136:     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
        !          1137:     zSign = ( a < 0 );
        !          1138:     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
        !          1139:
        !          1140: }
        !          1141:
        !          1142: /*
        !          1143: -------------------------------------------------------------------------------
        !          1144: Returns the result of converting the 32-bit two's complement integer `a'
        !          1145: to the double-precision floating-point format.  The conversion is performed
        !          1146: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1147: -------------------------------------------------------------------------------
        !          1148: */
        !          1149: float64 int32_to_float64( int32 a )
        !          1150: {
        !          1151:     flag zSign;
        !          1152:     uint32 absA;
        !          1153:     int8 shiftCount;
        !          1154:     bits64 zSig;
        !          1155:
        !          1156:     if ( a == 0 ) return 0;
        !          1157:     zSign = ( a < 0 );
        !          1158:     absA = zSign ? - a : a;
        !          1159:     shiftCount = countLeadingZeros32( absA ) + 21;
        !          1160:     zSig = absA;
        !          1161:     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
        !          1162:
        !          1163: }
        !          1164:
        !          1165: #ifdef FLOATX80
        !          1166:
        !          1167: /*
        !          1168: -------------------------------------------------------------------------------
        !          1169: Returns the result of converting the 32-bit two's complement integer `a'
        !          1170: to the extended double-precision floating-point format.  The conversion
        !          1171: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1172: Arithmetic.
        !          1173: -------------------------------------------------------------------------------
        !          1174: */
        !          1175: floatx80 int32_to_floatx80( int32 a )
        !          1176: {
        !          1177:     flag zSign;
        !          1178:     uint32 absA;
        !          1179:     int8 shiftCount;
        !          1180:     bits64 zSig;
        !          1181:
        !          1182:     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
        !          1183:     zSign = ( a < 0 );
        !          1184:     absA = zSign ? - a : a;
        !          1185:     shiftCount = countLeadingZeros32( absA ) + 32;
        !          1186:     zSig = absA;
        !          1187:     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
        !          1188:
        !          1189: }
        !          1190:
        !          1191: #endif
        !          1192:
        !          1193: #ifdef FLOAT128
        !          1194:
        !          1195: /*
        !          1196: -------------------------------------------------------------------------------
        !          1197: Returns the result of converting the 32-bit two's complement integer `a' to
        !          1198: the quadruple-precision floating-point format.  The conversion is performed
        !          1199: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1200: -------------------------------------------------------------------------------
        !          1201: */
        !          1202: float128 int32_to_float128( int32 a )
        !          1203: {
        !          1204:     flag zSign;
        !          1205:     uint32 absA;
        !          1206:     int8 shiftCount;
        !          1207:     bits64 zSig0;
        !          1208:
        !          1209:     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
        !          1210:     zSign = ( a < 0 );
        !          1211:     absA = zSign ? - a : a;
        !          1212:     shiftCount = countLeadingZeros32( absA ) + 17;
        !          1213:     zSig0 = absA;
        !          1214:     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
        !          1215:
        !          1216: }
        !          1217:
        !          1218: #endif
        !          1219:
        !          1220: #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
        !          1221: /*
        !          1222: -------------------------------------------------------------------------------
        !          1223: Returns the result of converting the 64-bit two's complement integer `a'
        !          1224: to the single-precision floating-point format.  The conversion is performed
        !          1225: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1226: -------------------------------------------------------------------------------
        !          1227: */
        !          1228: float32 int64_to_float32( int64 a )
        !          1229: {
        !          1230:     flag zSign;
        !          1231:     uint64 absA;
        !          1232:     int8 shiftCount;
        !          1233:
        !          1234:     if ( a == 0 ) return 0;
        !          1235:     zSign = ( a < 0 );
        !          1236:     absA = zSign ? - a : a;
        !          1237:     shiftCount = countLeadingZeros64( absA ) - 40;
        !          1238:     if ( 0 <= shiftCount ) {
        !          1239:         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
        !          1240:     }
        !          1241:     else {
        !          1242:         shiftCount += 7;
        !          1243:         if ( shiftCount < 0 ) {
        !          1244:             shift64RightJamming( absA, - shiftCount, &absA );
        !          1245:         }
        !          1246:         else {
        !          1247:             absA <<= shiftCount;
        !          1248:         }
        !          1249:         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
        !          1250:     }
        !          1251:
        !          1252: }
        !          1253:
        !          1254: /*
        !          1255: -------------------------------------------------------------------------------
        !          1256: Returns the result of converting the 64-bit two's complement integer `a'
        !          1257: to the double-precision floating-point format.  The conversion is performed
        !          1258: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1259: -------------------------------------------------------------------------------
        !          1260: */
        !          1261: float64 int64_to_float64( int64 a )
        !          1262: {
        !          1263:     flag zSign;
        !          1264:
        !          1265:     if ( a == 0 ) return 0;
        !          1266:     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
        !          1267:         return packFloat64( 1, 0x43E, 0 );
        !          1268:     }
        !          1269:     zSign = ( a < 0 );
        !          1270:     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
        !          1271:
        !          1272: }
        !          1273:
        !          1274: #ifdef FLOATX80
        !          1275:
        !          1276: /*
        !          1277: -------------------------------------------------------------------------------
        !          1278: Returns the result of converting the 64-bit two's complement integer `a'
        !          1279: to the extended double-precision floating-point format.  The conversion
        !          1280: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1281: Arithmetic.
        !          1282: -------------------------------------------------------------------------------
        !          1283: */
        !          1284: floatx80 int64_to_floatx80( int64 a )
        !          1285: {
        !          1286:     flag zSign;
        !          1287:     uint64 absA;
        !          1288:     int8 shiftCount;
        !          1289:
        !          1290:     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
        !          1291:     zSign = ( a < 0 );
        !          1292:     absA = zSign ? - a : a;
        !          1293:     shiftCount = countLeadingZeros64( absA );
        !          1294:     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
        !          1295:
        !          1296: }
        !          1297:
        !          1298: #endif
        !          1299:
        !          1300: #ifdef FLOAT128
        !          1301:
        !          1302: /*
        !          1303: -------------------------------------------------------------------------------
        !          1304: Returns the result of converting the 64-bit two's complement integer `a' to
        !          1305: the quadruple-precision floating-point format.  The conversion is performed
        !          1306: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1307: -------------------------------------------------------------------------------
        !          1308: */
        !          1309: float128 int64_to_float128( int64 a )
        !          1310: {
        !          1311:     flag zSign;
        !          1312:     uint64 absA;
        !          1313:     int8 shiftCount;
        !          1314:     int32 zExp;
        !          1315:     bits64 zSig0, zSig1;
        !          1316:
        !          1317:     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
        !          1318:     zSign = ( a < 0 );
        !          1319:     absA = zSign ? - a : a;
        !          1320:     shiftCount = countLeadingZeros64( absA ) + 49;
        !          1321:     zExp = 0x406E - shiftCount;
        !          1322:     if ( 64 <= shiftCount ) {
        !          1323:         zSig1 = 0;
        !          1324:         zSig0 = absA;
        !          1325:         shiftCount -= 64;
        !          1326:     }
        !          1327:     else {
        !          1328:         zSig1 = absA;
        !          1329:         zSig0 = 0;
        !          1330:     }
        !          1331:     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
        !          1332:     return packFloat128( zSign, zExp, zSig0, zSig1 );
        !          1333:
        !          1334: }
        !          1335:
        !          1336: #endif
        !          1337: #endif /* !SOFTFLOAT_FOR_GCC */
        !          1338:
        !          1339: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          1340: /*
        !          1341: -------------------------------------------------------------------------------
        !          1342: Returns the result of converting the single-precision floating-point value
        !          1343: `a' to the 32-bit two's complement integer format.  The conversion is
        !          1344: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1345: Arithmetic---which means in particular that the conversion is rounded
        !          1346: according to the current rounding mode.  If `a' is a NaN, the largest
        !          1347: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          1348: largest integer with the same sign as `a' is returned.
        !          1349: -------------------------------------------------------------------------------
        !          1350: */
        !          1351: int32 float32_to_int32( float32 a )
        !          1352: {
        !          1353:     flag aSign;
        !          1354:     int16 aExp, shiftCount;
        !          1355:     bits32 aSig;
        !          1356:     bits64 aSig64;
        !          1357:
        !          1358:     aSig = extractFloat32Frac( a );
        !          1359:     aExp = extractFloat32Exp( a );
        !          1360:     aSign = extractFloat32Sign( a );
        !          1361:     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
        !          1362:     if ( aExp ) aSig |= 0x00800000;
        !          1363:     shiftCount = 0xAF - aExp;
        !          1364:     aSig64 = aSig;
        !          1365:     aSig64 <<= 32;
        !          1366:     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
        !          1367:     return roundAndPackInt32( aSign, aSig64 );
        !          1368:
        !          1369: }
        !          1370: #endif /* !SOFTFLOAT_FOR_GCC */
        !          1371:
        !          1372: /*
        !          1373: -------------------------------------------------------------------------------
        !          1374: Returns the result of converting the single-precision floating-point value
        !          1375: `a' to the 32-bit two's complement integer format.  The conversion is
        !          1376: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1377: Arithmetic, except that the conversion is always rounded toward zero.
        !          1378: If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
        !          1379: the conversion overflows, the largest integer with the same sign as `a' is
        !          1380: returned.
        !          1381: -------------------------------------------------------------------------------
        !          1382: */
        !          1383: int32 float32_to_int32_round_to_zero( float32 a )
        !          1384: {
        !          1385:     flag aSign;
        !          1386:     int16 aExp, shiftCount;
        !          1387:     bits32 aSig;
        !          1388:     int32 z;
        !          1389:
        !          1390:     aSig = extractFloat32Frac( a );
        !          1391:     aExp = extractFloat32Exp( a );
        !          1392:     aSign = extractFloat32Sign( a );
        !          1393:     shiftCount = aExp - 0x9E;
        !          1394:     if ( 0 <= shiftCount ) {
        !          1395:         if ( a != 0xCF000000 ) {
        !          1396:             float_raise( float_flag_invalid );
        !          1397:             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
        !          1398:         }
        !          1399:         return (sbits32) 0x80000000;
        !          1400:     }
        !          1401:     else if ( aExp <= 0x7E ) {
        !          1402:         if ( aExp | aSig ) float_set_inexact();
        !          1403:         return 0;
        !          1404:     }
        !          1405:     aSig = ( aSig | 0x00800000 )<<8;
        !          1406:     z = aSig>>( - shiftCount );
        !          1407:     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
        !          1408:         float_set_inexact();
        !          1409:     }
        !          1410:     if ( aSign ) z = - z;
        !          1411:     return z;
        !          1412:
        !          1413: }
        !          1414:
        !          1415: #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
        !          1416: /*
        !          1417: -------------------------------------------------------------------------------
        !          1418: Returns the result of converting the single-precision floating-point value
        !          1419: `a' to the 64-bit two's complement integer format.  The conversion is
        !          1420: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1421: Arithmetic---which means in particular that the conversion is rounded
        !          1422: according to the current rounding mode.  If `a' is a NaN, the largest
        !          1423: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          1424: largest integer with the same sign as `a' is returned.
        !          1425: -------------------------------------------------------------------------------
        !          1426: */
        !          1427: int64 float32_to_int64( float32 a )
        !          1428: {
        !          1429:     flag aSign;
        !          1430:     int16 aExp, shiftCount;
        !          1431:     bits32 aSig;
        !          1432:     bits64 aSig64, aSigExtra;
        !          1433:
        !          1434:     aSig = extractFloat32Frac( a );
        !          1435:     aExp = extractFloat32Exp( a );
        !          1436:     aSign = extractFloat32Sign( a );
        !          1437:     shiftCount = 0xBE - aExp;
        !          1438:     if ( shiftCount < 0 ) {
        !          1439:         float_raise( float_flag_invalid );
        !          1440:         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
        !          1441:             return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          1442:         }
        !          1443:         return (sbits64) LIT64( 0x8000000000000000 );
        !          1444:     }
        !          1445:     if ( aExp ) aSig |= 0x00800000;
        !          1446:     aSig64 = aSig;
        !          1447:     aSig64 <<= 40;
        !          1448:     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
        !          1449:     return roundAndPackInt64( aSign, aSig64, aSigExtra );
        !          1450:
        !          1451: }
        !          1452:
        !          1453: /*
        !          1454: -------------------------------------------------------------------------------
        !          1455: Returns the result of converting the single-precision floating-point value
        !          1456: `a' to the 64-bit two's complement integer format.  The conversion is
        !          1457: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1458: Arithmetic, except that the conversion is always rounded toward zero.  If
        !          1459: `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
        !          1460: conversion overflows, the largest integer with the same sign as `a' is
        !          1461: returned.
        !          1462: -------------------------------------------------------------------------------
        !          1463: */
        !          1464: int64 float32_to_int64_round_to_zero( float32 a )
        !          1465: {
        !          1466:     flag aSign;
        !          1467:     int16 aExp, shiftCount;
        !          1468:     bits32 aSig;
        !          1469:     bits64 aSig64;
        !          1470:     int64 z;
        !          1471:
        !          1472:     aSig = extractFloat32Frac( a );
        !          1473:     aExp = extractFloat32Exp( a );
        !          1474:     aSign = extractFloat32Sign( a );
        !          1475:     shiftCount = aExp - 0xBE;
        !          1476:     if ( 0 <= shiftCount ) {
        !          1477:         if ( a != 0xDF000000 ) {
        !          1478:             float_raise( float_flag_invalid );
        !          1479:             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
        !          1480:                 return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          1481:             }
        !          1482:         }
        !          1483:         return (sbits64) LIT64( 0x8000000000000000 );
        !          1484:     }
        !          1485:     else if ( aExp <= 0x7E ) {
        !          1486:         if ( aExp | aSig ) float_set_inexact();
        !          1487:         return 0;
        !          1488:     }
        !          1489:     aSig64 = aSig | 0x00800000;
        !          1490:     aSig64 <<= 40;
        !          1491:     z = aSig64>>( - shiftCount );
        !          1492:     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
        !          1493:         float_set_inexact();
        !          1494:     }
        !          1495:     if ( aSign ) z = - z;
        !          1496:     return z;
        !          1497:
        !          1498: }
        !          1499: #endif /* !SOFTFLOAT_FOR_GCC */
        !          1500:
        !          1501: /*
        !          1502: -------------------------------------------------------------------------------
        !          1503: Returns the result of converting the single-precision floating-point value
        !          1504: `a' to the double-precision floating-point format.  The conversion is
        !          1505: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1506: Arithmetic.
        !          1507: -------------------------------------------------------------------------------
        !          1508: */
        !          1509: float64 float32_to_float64( float32 a )
        !          1510: {
        !          1511:     flag aSign;
        !          1512:     int16 aExp;
        !          1513:     bits32 aSig;
        !          1514:
        !          1515:     aSig = extractFloat32Frac( a );
        !          1516:     aExp = extractFloat32Exp( a );
        !          1517:     aSign = extractFloat32Sign( a );
        !          1518:     if ( aExp == 0xFF ) {
        !          1519:         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
        !          1520:         return packFloat64( aSign, 0x7FF, 0 );
        !          1521:     }
        !          1522:     if ( aExp == 0 ) {
        !          1523:         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
        !          1524:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          1525:         --aExp;
        !          1526:     }
        !          1527:     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
        !          1528:
        !          1529: }
        !          1530:
        !          1531: #ifdef FLOATX80
        !          1532:
        !          1533: /*
        !          1534: -------------------------------------------------------------------------------
        !          1535: Returns the result of converting the single-precision floating-point value
        !          1536: `a' to the extended double-precision floating-point format.  The conversion
        !          1537: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1538: Arithmetic.
        !          1539: -------------------------------------------------------------------------------
        !          1540: */
        !          1541: floatx80 float32_to_floatx80( float32 a )
        !          1542: {
        !          1543:     flag aSign;
        !          1544:     int16 aExp;
        !          1545:     bits32 aSig;
        !          1546:
        !          1547:     aSig = extractFloat32Frac( a );
        !          1548:     aExp = extractFloat32Exp( a );
        !          1549:     aSign = extractFloat32Sign( a );
        !          1550:     if ( aExp == 0xFF ) {
        !          1551:         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
        !          1552:         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          1553:     }
        !          1554:     if ( aExp == 0 ) {
        !          1555:         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
        !          1556:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          1557:     }
        !          1558:     aSig |= 0x00800000;
        !          1559:     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
        !          1560:
        !          1561: }
        !          1562:
        !          1563: #endif
        !          1564:
        !          1565: #ifdef FLOAT128
        !          1566:
        !          1567: /*
        !          1568: -------------------------------------------------------------------------------
        !          1569: Returns the result of converting the single-precision floating-point value
        !          1570: `a' to the double-precision floating-point format.  The conversion is
        !          1571: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          1572: Arithmetic.
        !          1573: -------------------------------------------------------------------------------
        !          1574: */
        !          1575: float128 float32_to_float128( float32 a )
        !          1576: {
        !          1577:     flag aSign;
        !          1578:     int16 aExp;
        !          1579:     bits32 aSig;
        !          1580:
        !          1581:     aSig = extractFloat32Frac( a );
        !          1582:     aExp = extractFloat32Exp( a );
        !          1583:     aSign = extractFloat32Sign( a );
        !          1584:     if ( aExp == 0xFF ) {
        !          1585:         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
        !          1586:         return packFloat128( aSign, 0x7FFF, 0, 0 );
        !          1587:     }
        !          1588:     if ( aExp == 0 ) {
        !          1589:         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
        !          1590:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          1591:         --aExp;
        !          1592:     }
        !          1593:     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
        !          1594:
        !          1595: }
        !          1596:
        !          1597: #endif
        !          1598:
        !          1599: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          1600: /*
        !          1601: -------------------------------------------------------------------------------
        !          1602: Rounds the single-precision floating-point value `a' to an integer, and
        !          1603: returns the result as a single-precision floating-point value.  The
        !          1604: operation is performed according to the IEC/IEEE Standard for Binary
        !          1605: Floating-Point Arithmetic.
        !          1606: -------------------------------------------------------------------------------
        !          1607: */
        !          1608: float32 float32_round_to_int( float32 a )
        !          1609: {
        !          1610:     flag aSign;
        !          1611:     int16 aExp;
        !          1612:     bits32 lastBitMask, roundBitsMask;
        !          1613:     int8 roundingMode;
        !          1614:     float32 z;
        !          1615:
        !          1616:     aExp = extractFloat32Exp( a );
        !          1617:     if ( 0x96 <= aExp ) {
        !          1618:         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
        !          1619:             return propagateFloat32NaN( a, a );
        !          1620:         }
        !          1621:         return a;
        !          1622:     }
        !          1623:     if ( aExp <= 0x7E ) {
        !          1624:         if ( (bits32) ( a<<1 ) == 0 ) return a;
        !          1625:         float_set_inexact();
        !          1626:         aSign = extractFloat32Sign( a );
        !          1627:         switch ( float_rounding_mode() ) {
        !          1628:          case float_round_nearest_even:
        !          1629:             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
        !          1630:                 return packFloat32( aSign, 0x7F, 0 );
        !          1631:             }
        !          1632:             break;
        !          1633:          case float_round_down:
        !          1634:             return aSign ? 0xBF800000 : 0;
        !          1635:          case float_round_up:
        !          1636:             return aSign ? 0x80000000 : 0x3F800000;
        !          1637:         }
        !          1638:         return packFloat32( aSign, 0, 0 );
        !          1639:     }
        !          1640:     lastBitMask = 1;
        !          1641:     lastBitMask <<= 0x96 - aExp;
        !          1642:     roundBitsMask = lastBitMask - 1;
        !          1643:     z = a;
        !          1644:     roundingMode = float_rounding_mode();
        !          1645:     if ( roundingMode == float_round_nearest_even ) {
        !          1646:         z += lastBitMask>>1;
        !          1647:         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
        !          1648:     }
        !          1649:     else if ( roundingMode != float_round_to_zero ) {
        !          1650:         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
        !          1651:             z += roundBitsMask;
        !          1652:         }
        !          1653:     }
        !          1654:     z &= ~ roundBitsMask;
        !          1655:     if ( z != a ) float_set_inexact();
        !          1656:     return z;
        !          1657:
        !          1658: }
        !          1659: #endif /* !SOFTFLOAT_FOR_GCC */
        !          1660:
        !          1661: /*
        !          1662: -------------------------------------------------------------------------------
        !          1663: Returns the result of adding the absolute values of the single-precision
        !          1664: floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
        !          1665: before being returned.  `zSign' is ignored if the result is a NaN.
        !          1666: The addition is performed according to the IEC/IEEE Standard for Binary
        !          1667: Floating-Point Arithmetic.
        !          1668: -------------------------------------------------------------------------------
        !          1669: */
        !          1670: static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
        !          1671: {
        !          1672:     int16 aExp, bExp, zExp;
        !          1673:     bits32 aSig, bSig, zSig;
        !          1674:     int16 expDiff;
        !          1675:
        !          1676:     aSig = extractFloat32Frac( a );
        !          1677:     aExp = extractFloat32Exp( a );
        !          1678:     bSig = extractFloat32Frac( b );
        !          1679:     bExp = extractFloat32Exp( b );
        !          1680:     expDiff = aExp - bExp;
        !          1681:     aSig <<= 6;
        !          1682:     bSig <<= 6;
        !          1683:     if ( 0 < expDiff ) {
        !          1684:         if ( aExp == 0xFF ) {
        !          1685:             if ( aSig ) return propagateFloat32NaN( a, b );
        !          1686:             return a;
        !          1687:         }
        !          1688:         if ( bExp == 0 ) {
        !          1689:             --expDiff;
        !          1690:         }
        !          1691:         else {
        !          1692:             bSig |= 0x20000000;
        !          1693:         }
        !          1694:         shift32RightJamming( bSig, expDiff, &bSig );
        !          1695:         zExp = aExp;
        !          1696:     }
        !          1697:     else if ( expDiff < 0 ) {
        !          1698:         if ( bExp == 0xFF ) {
        !          1699:             if ( bSig ) return propagateFloat32NaN( a, b );
        !          1700:             return packFloat32( zSign, 0xFF, 0 );
        !          1701:         }
        !          1702:         if ( aExp == 0 ) {
        !          1703:             ++expDiff;
        !          1704:         }
        !          1705:         else {
        !          1706:             aSig |= 0x20000000;
        !          1707:         }
        !          1708:         shift32RightJamming( aSig, - expDiff, &aSig );
        !          1709:         zExp = bExp;
        !          1710:     }
        !          1711:     else {
        !          1712:         if ( aExp == 0xFF ) {
        !          1713:             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
        !          1714:             return a;
        !          1715:         }
        !          1716:         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
        !          1717:         zSig = 0x40000000 + aSig + bSig;
        !          1718:         zExp = aExp;
        !          1719:         goto roundAndPack;
        !          1720:     }
        !          1721:     aSig |= 0x20000000;
        !          1722:     zSig = ( aSig + bSig )<<1;
        !          1723:     --zExp;
        !          1724:     if ( (sbits32) zSig < 0 ) {
        !          1725:         zSig = aSig + bSig;
        !          1726:         ++zExp;
        !          1727:     }
        !          1728:  roundAndPack:
        !          1729:     return roundAndPackFloat32( zSign, zExp, zSig );
        !          1730:
        !          1731: }
        !          1732:
        !          1733: /*
        !          1734: -------------------------------------------------------------------------------
        !          1735: Returns the result of subtracting the absolute values of the single-
        !          1736: precision floating-point values `a' and `b'.  If `zSign' is 1, the
        !          1737: difference is negated before being returned.  `zSign' is ignored if the
        !          1738: result is a NaN.  The subtraction is performed according to the IEC/IEEE
        !          1739: Standard for Binary Floating-Point Arithmetic.
        !          1740: -------------------------------------------------------------------------------
        !          1741: */
        !          1742: static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
        !          1743: {
        !          1744:     int16 aExp, bExp, zExp;
        !          1745:     bits32 aSig, bSig, zSig;
        !          1746:     int16 expDiff;
        !          1747:
        !          1748:     aSig = extractFloat32Frac( a );
        !          1749:     aExp = extractFloat32Exp( a );
        !          1750:     bSig = extractFloat32Frac( b );
        !          1751:     bExp = extractFloat32Exp( b );
        !          1752:     expDiff = aExp - bExp;
        !          1753:     aSig <<= 7;
        !          1754:     bSig <<= 7;
        !          1755:     if ( 0 < expDiff ) goto aExpBigger;
        !          1756:     if ( expDiff < 0 ) goto bExpBigger;
        !          1757:     if ( aExp == 0xFF ) {
        !          1758:         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
        !          1759:         float_raise( float_flag_invalid );
        !          1760:         return float32_default_nan;
        !          1761:     }
        !          1762:     if ( aExp == 0 ) {
        !          1763:         aExp = 1;
        !          1764:         bExp = 1;
        !          1765:     }
        !          1766:     if ( bSig < aSig ) goto aBigger;
        !          1767:     if ( aSig < bSig ) goto bBigger;
        !          1768:     return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
        !          1769:  bExpBigger:
        !          1770:     if ( bExp == 0xFF ) {
        !          1771:         if ( bSig ) return propagateFloat32NaN( a, b );
        !          1772:         return packFloat32( zSign ^ 1, 0xFF, 0 );
        !          1773:     }
        !          1774:     if ( aExp == 0 ) {
        !          1775:         ++expDiff;
        !          1776:     }
        !          1777:     else {
        !          1778:         aSig |= 0x40000000;
        !          1779:     }
        !          1780:     shift32RightJamming( aSig, - expDiff, &aSig );
        !          1781:     bSig |= 0x40000000;
        !          1782:  bBigger:
        !          1783:     zSig = bSig - aSig;
        !          1784:     zExp = bExp;
        !          1785:     zSign ^= 1;
        !          1786:     goto normalizeRoundAndPack;
        !          1787:  aExpBigger:
        !          1788:     if ( aExp == 0xFF ) {
        !          1789:         if ( aSig ) return propagateFloat32NaN( a, b );
        !          1790:         return a;
        !          1791:     }
        !          1792:     if ( bExp == 0 ) {
        !          1793:         --expDiff;
        !          1794:     }
        !          1795:     else {
        !          1796:         bSig |= 0x40000000;
        !          1797:     }
        !          1798:     shift32RightJamming( bSig, expDiff, &bSig );
        !          1799:     aSig |= 0x40000000;
        !          1800:  aBigger:
        !          1801:     zSig = aSig - bSig;
        !          1802:     zExp = aExp;
        !          1803:  normalizeRoundAndPack:
        !          1804:     --zExp;
        !          1805:     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
        !          1806:
        !          1807: }
        !          1808:
        !          1809: /*
        !          1810: -------------------------------------------------------------------------------
        !          1811: Returns the result of adding the single-precision floating-point values `a'
        !          1812: and `b'.  The operation is performed according to the IEC/IEEE Standard for
        !          1813: Binary Floating-Point Arithmetic.
        !          1814: -------------------------------------------------------------------------------
        !          1815: */
        !          1816: float32 float32_add( float32 a, float32 b )
        !          1817: {
        !          1818:     flag aSign, bSign;
        !          1819:
        !          1820:     aSign = extractFloat32Sign( a );
        !          1821:     bSign = extractFloat32Sign( b );
        !          1822:     if ( aSign == bSign ) {
        !          1823:         return addFloat32Sigs( a, b, aSign );
        !          1824:     }
        !          1825:     else {
        !          1826:         return subFloat32Sigs( a, b, aSign );
        !          1827:     }
        !          1828:
        !          1829: }
        !          1830:
        !          1831: /*
        !          1832: -------------------------------------------------------------------------------
        !          1833: Returns the result of subtracting the single-precision floating-point values
        !          1834: `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
        !          1835: for Binary Floating-Point Arithmetic.
        !          1836: -------------------------------------------------------------------------------
        !          1837: */
        !          1838: float32 float32_sub( float32 a, float32 b )
        !          1839: {
        !          1840:     flag aSign, bSign;
        !          1841:
        !          1842:     aSign = extractFloat32Sign( a );
        !          1843:     bSign = extractFloat32Sign( b );
        !          1844:     if ( aSign == bSign ) {
        !          1845:         return subFloat32Sigs( a, b, aSign );
        !          1846:     }
        !          1847:     else {
        !          1848:         return addFloat32Sigs( a, b, aSign );
        !          1849:     }
        !          1850:
        !          1851: }
        !          1852:
        !          1853: /*
        !          1854: -------------------------------------------------------------------------------
        !          1855: Returns the result of multiplying the single-precision floating-point values
        !          1856: `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
        !          1857: for Binary Floating-Point Arithmetic.
        !          1858: -------------------------------------------------------------------------------
        !          1859: */
        !          1860: float32 float32_mul( float32 a, float32 b )
        !          1861: {
        !          1862:     flag aSign, bSign, zSign;
        !          1863:     int16 aExp, bExp, zExp;
        !          1864:     bits32 aSig, bSig;
        !          1865:     bits64 zSig64;
        !          1866:     bits32 zSig;
        !          1867:
        !          1868:     aSig = extractFloat32Frac( a );
        !          1869:     aExp = extractFloat32Exp( a );
        !          1870:     aSign = extractFloat32Sign( a );
        !          1871:     bSig = extractFloat32Frac( b );
        !          1872:     bExp = extractFloat32Exp( b );
        !          1873:     bSign = extractFloat32Sign( b );
        !          1874:     zSign = aSign ^ bSign;
        !          1875:     if ( aExp == 0xFF ) {
        !          1876:         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
        !          1877:             return propagateFloat32NaN( a, b );
        !          1878:         }
        !          1879:         if ( ( bExp | bSig ) == 0 ) {
        !          1880:             float_raise( float_flag_invalid );
        !          1881:             return float32_default_nan;
        !          1882:         }
        !          1883:         return packFloat32( zSign, 0xFF, 0 );
        !          1884:     }
        !          1885:     if ( bExp == 0xFF ) {
        !          1886:         if ( bSig ) return propagateFloat32NaN( a, b );
        !          1887:         if ( ( aExp | aSig ) == 0 ) {
        !          1888:             float_raise( float_flag_invalid );
        !          1889:             return float32_default_nan;
        !          1890:         }
        !          1891:         return packFloat32( zSign, 0xFF, 0 );
        !          1892:     }
        !          1893:     if ( aExp == 0 ) {
        !          1894:         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
        !          1895:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          1896:     }
        !          1897:     if ( bExp == 0 ) {
        !          1898:         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
        !          1899:         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
        !          1900:     }
        !          1901:     zExp = aExp + bExp - 0x7F;
        !          1902:     aSig = ( aSig | 0x00800000 )<<7;
        !          1903:     bSig = ( bSig | 0x00800000 )<<8;
        !          1904:     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
        !          1905:     zSig = zSig64;
        !          1906:     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
        !          1907:         zSig <<= 1;
        !          1908:         --zExp;
        !          1909:     }
        !          1910:     return roundAndPackFloat32( zSign, zExp, zSig );
        !          1911:
        !          1912: }
        !          1913:
        !          1914: /*
        !          1915: -------------------------------------------------------------------------------
        !          1916: Returns the result of dividing the single-precision floating-point value `a'
        !          1917: by the corresponding value `b'.  The operation is performed according to the
        !          1918: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1919: -------------------------------------------------------------------------------
        !          1920: */
        !          1921: float32 float32_div( float32 a, float32 b )
        !          1922: {
        !          1923:     flag aSign, bSign, zSign;
        !          1924:     int16 aExp, bExp, zExp;
        !          1925:     bits32 aSig, bSig, zSig;
        !          1926:
        !          1927:     aSig = extractFloat32Frac( a );
        !          1928:     aExp = extractFloat32Exp( a );
        !          1929:     aSign = extractFloat32Sign( a );
        !          1930:     bSig = extractFloat32Frac( b );
        !          1931:     bExp = extractFloat32Exp( b );
        !          1932:     bSign = extractFloat32Sign( b );
        !          1933:     zSign = aSign ^ bSign;
        !          1934:     if ( aExp == 0xFF ) {
        !          1935:         if ( aSig ) return propagateFloat32NaN( a, b );
        !          1936:         if ( bExp == 0xFF ) {
        !          1937:             if ( bSig ) return propagateFloat32NaN( a, b );
        !          1938:             float_raise( float_flag_invalid );
        !          1939:             return float32_default_nan;
        !          1940:         }
        !          1941:         return packFloat32( zSign, 0xFF, 0 );
        !          1942:     }
        !          1943:     if ( bExp == 0xFF ) {
        !          1944:         if ( bSig ) return propagateFloat32NaN( a, b );
        !          1945:         return packFloat32( zSign, 0, 0 );
        !          1946:     }
        !          1947:     if ( bExp == 0 ) {
        !          1948:         if ( bSig == 0 ) {
        !          1949:             if ( ( aExp | aSig ) == 0 ) {
        !          1950:                 float_raise( float_flag_invalid );
        !          1951:                 return float32_default_nan;
        !          1952:             }
        !          1953:             float_raise( float_flag_divbyzero );
        !          1954:             return packFloat32( zSign, 0xFF, 0 );
        !          1955:         }
        !          1956:         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
        !          1957:     }
        !          1958:     if ( aExp == 0 ) {
        !          1959:         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
        !          1960:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          1961:     }
        !          1962:     zExp = aExp - bExp + 0x7D;
        !          1963:     aSig = ( aSig | 0x00800000 )<<7;
        !          1964:     bSig = ( bSig | 0x00800000 )<<8;
        !          1965:     if ( bSig <= ( aSig + aSig ) ) {
        !          1966:         aSig >>= 1;
        !          1967:         ++zExp;
        !          1968:     }
        !          1969:     zSig = ( ( (bits64) aSig )<<32 ) / bSig;
        !          1970:     if ( ( zSig & 0x3F ) == 0 ) {
        !          1971:         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
        !          1972:     }
        !          1973:     return roundAndPackFloat32( zSign, zExp, zSig );
        !          1974:
        !          1975: }
        !          1976:
        !          1977: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          1978: /*
        !          1979: -------------------------------------------------------------------------------
        !          1980: Returns the remainder of the single-precision floating-point value `a'
        !          1981: with respect to the corresponding value `b'.  The operation is performed
        !          1982: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          1983: -------------------------------------------------------------------------------
        !          1984: */
        !          1985: float32 float32_rem( float32 a, float32 b )
        !          1986: {
        !          1987:     flag aSign, bSign, zSign;
        !          1988:     int16 aExp, bExp, expDiff;
        !          1989:     bits32 aSig, bSig;
        !          1990:     bits32 q;
        !          1991:     bits64 aSig64, bSig64, q64;
        !          1992:     bits32 alternateASig;
        !          1993:     sbits32 sigMean;
        !          1994:
        !          1995:     aSig = extractFloat32Frac( a );
        !          1996:     aExp = extractFloat32Exp( a );
        !          1997:     aSign = extractFloat32Sign( a );
        !          1998:     bSig = extractFloat32Frac( b );
        !          1999:     bExp = extractFloat32Exp( b );
        !          2000:     bSign = extractFloat32Sign( b );
        !          2001:     if ( aExp == 0xFF ) {
        !          2002:         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
        !          2003:             return propagateFloat32NaN( a, b );
        !          2004:         }
        !          2005:         float_raise( float_flag_invalid );
        !          2006:         return float32_default_nan;
        !          2007:     }
        !          2008:     if ( bExp == 0xFF ) {
        !          2009:         if ( bSig ) return propagateFloat32NaN( a, b );
        !          2010:         return a;
        !          2011:     }
        !          2012:     if ( bExp == 0 ) {
        !          2013:         if ( bSig == 0 ) {
        !          2014:             float_raise( float_flag_invalid );
        !          2015:             return float32_default_nan;
        !          2016:         }
        !          2017:         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
        !          2018:     }
        !          2019:     if ( aExp == 0 ) {
        !          2020:         if ( aSig == 0 ) return a;
        !          2021:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          2022:     }
        !          2023:     expDiff = aExp - bExp;
        !          2024:     aSig |= 0x00800000;
        !          2025:     bSig |= 0x00800000;
        !          2026:     if ( expDiff < 32 ) {
        !          2027:         aSig <<= 8;
        !          2028:         bSig <<= 8;
        !          2029:         if ( expDiff < 0 ) {
        !          2030:             if ( expDiff < -1 ) return a;
        !          2031:             aSig >>= 1;
        !          2032:         }
        !          2033:         q = ( bSig <= aSig );
        !          2034:         if ( q ) aSig -= bSig;
        !          2035:         if ( 0 < expDiff ) {
        !          2036:             q = ( ( (bits64) aSig )<<32 ) / bSig;
        !          2037:             q >>= 32 - expDiff;
        !          2038:             bSig >>= 2;
        !          2039:             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
        !          2040:         }
        !          2041:         else {
        !          2042:             aSig >>= 2;
        !          2043:             bSig >>= 2;
        !          2044:         }
        !          2045:     }
        !          2046:     else {
        !          2047:         if ( bSig <= aSig ) aSig -= bSig;
        !          2048:         aSig64 = ( (bits64) aSig )<<40;
        !          2049:         bSig64 = ( (bits64) bSig )<<40;
        !          2050:         expDiff -= 64;
        !          2051:         while ( 0 < expDiff ) {
        !          2052:             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
        !          2053:             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
        !          2054:             aSig64 = - ( ( bSig * q64 )<<38 );
        !          2055:             expDiff -= 62;
        !          2056:         }
        !          2057:         expDiff += 64;
        !          2058:         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
        !          2059:         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
        !          2060:         q = q64>>( 64 - expDiff );
        !          2061:         bSig <<= 6;
        !          2062:         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
        !          2063:     }
        !          2064:     do {
        !          2065:         alternateASig = aSig;
        !          2066:         ++q;
        !          2067:         aSig -= bSig;
        !          2068:     } while ( 0 <= (sbits32) aSig );
        !          2069:     sigMean = aSig + alternateASig;
        !          2070:     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
        !          2071:         aSig = alternateASig;
        !          2072:     }
        !          2073:     zSign = ( (sbits32) aSig < 0 );
        !          2074:     if ( zSign ) aSig = - aSig;
        !          2075:     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
        !          2076:
        !          2077: }
        !          2078: #endif /* !SOFTFLOAT_FOR_GCC */
        !          2079:
        !          2080: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          2081: /*
        !          2082: -------------------------------------------------------------------------------
        !          2083: Returns the square root of the single-precision floating-point value `a'.
        !          2084: The operation is performed according to the IEC/IEEE Standard for Binary
        !          2085: Floating-Point Arithmetic.
        !          2086: -------------------------------------------------------------------------------
        !          2087: */
        !          2088: float32 float32_sqrt( float32 a )
        !          2089: {
        !          2090:     flag aSign;
        !          2091:     int16 aExp, zExp;
        !          2092:     bits32 aSig, zSig;
        !          2093:     bits64 rem, term;
        !          2094:
        !          2095:     aSig = extractFloat32Frac( a );
        !          2096:     aExp = extractFloat32Exp( a );
        !          2097:     aSign = extractFloat32Sign( a );
        !          2098:     if ( aExp == 0xFF ) {
        !          2099:         if ( aSig ) return propagateFloat32NaN( a, 0 );
        !          2100:         if ( ! aSign ) return a;
        !          2101:         float_raise( float_flag_invalid );
        !          2102:         return float32_default_nan;
        !          2103:     }
        !          2104:     if ( aSign ) {
        !          2105:         if ( ( aExp | aSig ) == 0 ) return a;
        !          2106:         float_raise( float_flag_invalid );
        !          2107:         return float32_default_nan;
        !          2108:     }
        !          2109:     if ( aExp == 0 ) {
        !          2110:         if ( aSig == 0 ) return 0;
        !          2111:         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
        !          2112:     }
        !          2113:     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
        !          2114:     aSig = ( aSig | 0x00800000 )<<8;
        !          2115:     zSig = estimateSqrt32( aExp, aSig ) + 2;
        !          2116:     if ( ( zSig & 0x7F ) <= 5 ) {
        !          2117:         if ( zSig < 2 ) {
        !          2118:             zSig = 0x7FFFFFFF;
        !          2119:             goto roundAndPack;
        !          2120:         }
        !          2121:         aSig >>= aExp & 1;
        !          2122:         term = ( (bits64) zSig ) * zSig;
        !          2123:         rem = ( ( (bits64) aSig )<<32 ) - term;
        !          2124:         while ( (sbits64) rem < 0 ) {
        !          2125:             --zSig;
        !          2126:             rem += ( ( (bits64) zSig )<<1 ) | 1;
        !          2127:         }
        !          2128:         zSig |= ( rem != 0 );
        !          2129:     }
        !          2130:     shift32RightJamming( zSig, 1, &zSig );
        !          2131:  roundAndPack:
        !          2132:     return roundAndPackFloat32( 0, zExp, zSig );
        !          2133:
        !          2134: }
        !          2135: #endif /* !SOFTFLOAT_FOR_GCC */
        !          2136:
        !          2137: /*
        !          2138: -------------------------------------------------------------------------------
        !          2139: Returns 1 if the single-precision floating-point value `a' is equal to
        !          2140: the corresponding value `b', and 0 otherwise.  The comparison is performed
        !          2141: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2142: -------------------------------------------------------------------------------
        !          2143: */
        !          2144: flag float32_eq( float32 a, float32 b )
        !          2145: {
        !          2146:
        !          2147:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2148:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2149:        ) {
        !          2150:         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
        !          2151:             float_raise( float_flag_invalid );
        !          2152:         }
        !          2153:         return 0;
        !          2154:     }
        !          2155:     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
        !          2156:
        !          2157: }
        !          2158:
        !          2159: /*
        !          2160: -------------------------------------------------------------------------------
        !          2161: Returns 1 if the single-precision floating-point value `a' is less than
        !          2162: or equal to the corresponding value `b', and 0 otherwise.  The comparison
        !          2163: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2164: Arithmetic.
        !          2165: -------------------------------------------------------------------------------
        !          2166: */
        !          2167: flag float32_le( float32 a, float32 b )
        !          2168: {
        !          2169:     flag aSign, bSign;
        !          2170:
        !          2171:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2172:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2173:        ) {
        !          2174:         float_raise( float_flag_invalid );
        !          2175:         return 0;
        !          2176:     }
        !          2177:     aSign = extractFloat32Sign( a );
        !          2178:     bSign = extractFloat32Sign( b );
        !          2179:     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
        !          2180:     return ( a == b ) || ( aSign ^ ( a < b ) );
        !          2181:
        !          2182: }
        !          2183:
        !          2184: /*
        !          2185: -------------------------------------------------------------------------------
        !          2186: Returns 1 if the single-precision floating-point value `a' is less than
        !          2187: the corresponding value `b', and 0 otherwise.  The comparison is performed
        !          2188: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2189: -------------------------------------------------------------------------------
        !          2190: */
        !          2191: flag float32_lt( float32 a, float32 b )
        !          2192: {
        !          2193:     flag aSign, bSign;
        !          2194:
        !          2195:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2196:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2197:        ) {
        !          2198:         float_raise( float_flag_invalid );
        !          2199:         return 0;
        !          2200:     }
        !          2201:     aSign = extractFloat32Sign( a );
        !          2202:     bSign = extractFloat32Sign( b );
        !          2203:     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
        !          2204:     return ( a != b ) && ( aSign ^ ( a < b ) );
        !          2205:
        !          2206: }
        !          2207:
        !          2208: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          2209: /*
        !          2210: -------------------------------------------------------------------------------
        !          2211: Returns 1 if the single-precision floating-point value `a' is equal to
        !          2212: the corresponding value `b', and 0 otherwise.  The invalid exception is
        !          2213: raised if either operand is a NaN.  Otherwise, the comparison is performed
        !          2214: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2215: -------------------------------------------------------------------------------
        !          2216: */
        !          2217: flag float32_eq_signaling( float32 a, float32 b )
        !          2218: {
        !          2219:
        !          2220:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2221:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2222:        ) {
        !          2223:         float_raise( float_flag_invalid );
        !          2224:         return 0;
        !          2225:     }
        !          2226:     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
        !          2227:
        !          2228: }
        !          2229:
        !          2230: /*
        !          2231: -------------------------------------------------------------------------------
        !          2232: Returns 1 if the single-precision floating-point value `a' is less than or
        !          2233: equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
        !          2234: cause an exception.  Otherwise, the comparison is performed according to the
        !          2235: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2236: -------------------------------------------------------------------------------
        !          2237: */
        !          2238: flag float32_le_quiet( float32 a, float32 b )
        !          2239: {
        !          2240:     flag aSign, bSign;
        !          2241:
        !          2242:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2243:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2244:        ) {
        !          2245:         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
        !          2246:             float_raise( float_flag_invalid );
        !          2247:         }
        !          2248:         return 0;
        !          2249:     }
        !          2250:     aSign = extractFloat32Sign( a );
        !          2251:     bSign = extractFloat32Sign( b );
        !          2252:     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
        !          2253:     return ( a == b ) || ( aSign ^ ( a < b ) );
        !          2254:
        !          2255: }
        !          2256:
        !          2257: /*
        !          2258: -------------------------------------------------------------------------------
        !          2259: Returns 1 if the single-precision floating-point value `a' is less than
        !          2260: the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
        !          2261: exception.  Otherwise, the comparison is performed according to the IEC/IEEE
        !          2262: Standard for Binary Floating-Point Arithmetic.
        !          2263: -------------------------------------------------------------------------------
        !          2264: */
        !          2265: flag float32_lt_quiet( float32 a, float32 b )
        !          2266: {
        !          2267:     flag aSign, bSign;
        !          2268:
        !          2269:     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
        !          2270:          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        !          2271:        ) {
        !          2272:         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
        !          2273:             float_raise( float_flag_invalid );
        !          2274:         }
        !          2275:         return 0;
        !          2276:     }
        !          2277:     aSign = extractFloat32Sign( a );
        !          2278:     bSign = extractFloat32Sign( b );
        !          2279:     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
        !          2280:     return ( a != b ) && ( aSign ^ ( a < b ) );
        !          2281:
        !          2282: }
        !          2283: #endif /* !SOFTFLOAT_FOR_GCC */
        !          2284:
        !          2285: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          2286: /*
        !          2287: -------------------------------------------------------------------------------
        !          2288: Returns the result of converting the double-precision floating-point value
        !          2289: `a' to the 32-bit two's complement integer format.  The conversion is
        !          2290: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2291: Arithmetic---which means in particular that the conversion is rounded
        !          2292: according to the current rounding mode.  If `a' is a NaN, the largest
        !          2293: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          2294: largest integer with the same sign as `a' is returned.
        !          2295: -------------------------------------------------------------------------------
        !          2296: */
        !          2297: int32 float64_to_int32( float64 a )
        !          2298: {
        !          2299:     flag aSign;
        !          2300:     int16 aExp, shiftCount;
        !          2301:     bits64 aSig;
        !          2302:
        !          2303:     aSig = extractFloat64Frac( a );
        !          2304:     aExp = extractFloat64Exp( a );
        !          2305:     aSign = extractFloat64Sign( a );
        !          2306:     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
        !          2307:     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
        !          2308:     shiftCount = 0x42C - aExp;
        !          2309:     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
        !          2310:     return roundAndPackInt32( aSign, aSig );
        !          2311:
        !          2312: }
        !          2313: #endif /* !SOFTFLOAT_FOR_GCC */
        !          2314:
        !          2315: /*
        !          2316: -------------------------------------------------------------------------------
        !          2317: Returns the result of converting the double-precision floating-point value
        !          2318: `a' to the 32-bit two's complement integer format.  The conversion is
        !          2319: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2320: Arithmetic, except that the conversion is always rounded toward zero.
        !          2321: If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
        !          2322: the conversion overflows, the largest integer with the same sign as `a' is
        !          2323: returned.
        !          2324: -------------------------------------------------------------------------------
        !          2325: */
        !          2326: int32 float64_to_int32_round_to_zero( float64 a )
        !          2327: {
        !          2328:     flag aSign;
        !          2329:     int16 aExp, shiftCount;
        !          2330:     bits64 aSig, savedASig;
        !          2331:     int32 z;
        !          2332:
        !          2333:     aSig = extractFloat64Frac( a );
        !          2334:     aExp = extractFloat64Exp( a );
        !          2335:     aSign = extractFloat64Sign( a );
        !          2336:     if ( 0x41E < aExp ) {
        !          2337:         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
        !          2338:         goto invalid;
        !          2339:     }
        !          2340:     else if ( aExp < 0x3FF ) {
        !          2341:         if ( aExp || aSig ) float_set_inexact();
        !          2342:         return 0;
        !          2343:     }
        !          2344:     aSig |= LIT64( 0x0010000000000000 );
        !          2345:     shiftCount = 0x433 - aExp;
        !          2346:     savedASig = aSig;
        !          2347:     aSig >>= shiftCount;
        !          2348:     z = aSig;
        !          2349:     if ( aSign ) z = - z;
        !          2350:     if ( ( z < 0 ) ^ aSign ) {
        !          2351:  invalid:
        !          2352:         float_raise( float_flag_invalid );
        !          2353:         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
        !          2354:     }
        !          2355:     if ( ( aSig<<shiftCount ) != savedASig ) {
        !          2356:         float_set_inexact();
        !          2357:     }
        !          2358:     return z;
        !          2359:
        !          2360: }
        !          2361:
        !          2362: #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
        !          2363: /*
        !          2364: -------------------------------------------------------------------------------
        !          2365: Returns the result of converting the double-precision floating-point value
        !          2366: `a' to the 64-bit two's complement integer format.  The conversion is
        !          2367: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2368: Arithmetic---which means in particular that the conversion is rounded
        !          2369: according to the current rounding mode.  If `a' is a NaN, the largest
        !          2370: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          2371: largest integer with the same sign as `a' is returned.
        !          2372: -------------------------------------------------------------------------------
        !          2373: */
        !          2374: int64 float64_to_int64( float64 a )
        !          2375: {
        !          2376:     flag aSign;
        !          2377:     int16 aExp, shiftCount;
        !          2378:     bits64 aSig, aSigExtra;
        !          2379:
        !          2380:     aSig = extractFloat64Frac( a );
        !          2381:     aExp = extractFloat64Exp( a );
        !          2382:     aSign = extractFloat64Sign( a );
        !          2383:     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
        !          2384:     shiftCount = 0x433 - aExp;
        !          2385:     if ( shiftCount <= 0 ) {
        !          2386:         if ( 0x43E < aExp ) {
        !          2387:             float_raise( float_flag_invalid );
        !          2388:             if (    ! aSign
        !          2389:                  || (    ( aExp == 0x7FF )
        !          2390:                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
        !          2391:                ) {
        !          2392:                 return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          2393:             }
        !          2394:             return (sbits64) LIT64( 0x8000000000000000 );
        !          2395:         }
        !          2396:         aSigExtra = 0;
        !          2397:         aSig <<= - shiftCount;
        !          2398:     }
        !          2399:     else {
        !          2400:         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
        !          2401:     }
        !          2402:     return roundAndPackInt64( aSign, aSig, aSigExtra );
        !          2403:
        !          2404: }
        !          2405:
        !          2406: /*
        !          2407: -------------------------------------------------------------------------------
        !          2408: Returns the result of converting the double-precision floating-point value
        !          2409: `a' to the 64-bit two's complement integer format.  The conversion is
        !          2410: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2411: Arithmetic, except that the conversion is always rounded toward zero.
        !          2412: If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
        !          2413: the conversion overflows, the largest integer with the same sign as `a' is
        !          2414: returned.
        !          2415: -------------------------------------------------------------------------------
        !          2416: */
        !          2417: int64 float64_to_int64_round_to_zero( float64 a )
        !          2418: {
        !          2419:     flag aSign;
        !          2420:     int16 aExp, shiftCount;
        !          2421:     bits64 aSig;
        !          2422:     int64 z;
        !          2423:
        !          2424:     aSig = extractFloat64Frac( a );
        !          2425:     aExp = extractFloat64Exp( a );
        !          2426:     aSign = extractFloat64Sign( a );
        !          2427:     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
        !          2428:     shiftCount = aExp - 0x433;
        !          2429:     if ( 0 <= shiftCount ) {
        !          2430:         if ( 0x43E <= aExp ) {
        !          2431:             if ( a != LIT64( 0xC3E0000000000000 ) ) {
        !          2432:                 float_raise( float_flag_invalid );
        !          2433:                 if (    ! aSign
        !          2434:                      || (    ( aExp == 0x7FF )
        !          2435:                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
        !          2436:                    ) {
        !          2437:                     return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          2438:                 }
        !          2439:             }
        !          2440:             return (sbits64) LIT64( 0x8000000000000000 );
        !          2441:         }
        !          2442:         z = aSig<<shiftCount;
        !          2443:     }
        !          2444:     else {
        !          2445:         if ( aExp < 0x3FE ) {
        !          2446:             if ( aExp | aSig ) float_set_inexact();
        !          2447:             return 0;
        !          2448:         }
        !          2449:         z = aSig>>( - shiftCount );
        !          2450:         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
        !          2451:             float_set_inexact();
        !          2452:         }
        !          2453:     }
        !          2454:     if ( aSign ) z = - z;
        !          2455:     return z;
        !          2456:
        !          2457: }
        !          2458: #endif /* !SOFTFLOAT_FOR_GCC */
        !          2459:
        !          2460: /*
        !          2461: -------------------------------------------------------------------------------
        !          2462: Returns the result of converting the double-precision floating-point value
        !          2463: `a' to the single-precision floating-point format.  The conversion is
        !          2464: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2465: Arithmetic.
        !          2466: -------------------------------------------------------------------------------
        !          2467: */
        !          2468: float32 float64_to_float32( float64 a )
        !          2469: {
        !          2470:     flag aSign;
        !          2471:     int16 aExp;
        !          2472:     bits64 aSig;
        !          2473:     bits32 zSig;
        !          2474:
        !          2475:     aSig = extractFloat64Frac( a );
        !          2476:     aExp = extractFloat64Exp( a );
        !          2477:     aSign = extractFloat64Sign( a );
        !          2478:     if ( aExp == 0x7FF ) {
        !          2479:         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
        !          2480:         return packFloat32( aSign, 0xFF, 0 );
        !          2481:     }
        !          2482:     shift64RightJamming( aSig, 22, &aSig );
        !          2483:     zSig = aSig;
        !          2484:     if ( aExp || zSig ) {
        !          2485:         zSig |= 0x40000000;
        !          2486:         aExp -= 0x381;
        !          2487:     }
        !          2488:     return roundAndPackFloat32( aSign, aExp, zSig );
        !          2489:
        !          2490: }
        !          2491:
        !          2492: #ifdef FLOATX80
        !          2493:
        !          2494: /*
        !          2495: -------------------------------------------------------------------------------
        !          2496: Returns the result of converting the double-precision floating-point value
        !          2497: `a' to the extended double-precision floating-point format.  The conversion
        !          2498: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2499: Arithmetic.
        !          2500: -------------------------------------------------------------------------------
        !          2501: */
        !          2502: floatx80 float64_to_floatx80( float64 a )
        !          2503: {
        !          2504:     flag aSign;
        !          2505:     int16 aExp;
        !          2506:     bits64 aSig;
        !          2507:
        !          2508:     aSig = extractFloat64Frac( a );
        !          2509:     aExp = extractFloat64Exp( a );
        !          2510:     aSign = extractFloat64Sign( a );
        !          2511:     if ( aExp == 0x7FF ) {
        !          2512:         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
        !          2513:         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          2514:     }
        !          2515:     if ( aExp == 0 ) {
        !          2516:         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
        !          2517:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          2518:     }
        !          2519:     return
        !          2520:         packFloatx80(
        !          2521:             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
        !          2522:
        !          2523: }
        !          2524:
        !          2525: #endif
        !          2526:
        !          2527: #ifdef FLOAT128
        !          2528:
        !          2529: /*
        !          2530: -------------------------------------------------------------------------------
        !          2531: Returns the result of converting the double-precision floating-point value
        !          2532: `a' to the quadruple-precision floating-point format.  The conversion is
        !          2533: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          2534: Arithmetic.
        !          2535: -------------------------------------------------------------------------------
        !          2536: */
        !          2537: float128 float64_to_float128( float64 a )
        !          2538: {
        !          2539:     flag aSign;
        !          2540:     int16 aExp;
        !          2541:     bits64 aSig, zSig0, zSig1;
        !          2542:
        !          2543:     aSig = extractFloat64Frac( a );
        !          2544:     aExp = extractFloat64Exp( a );
        !          2545:     aSign = extractFloat64Sign( a );
        !          2546:     if ( aExp == 0x7FF ) {
        !          2547:         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
        !          2548:         return packFloat128( aSign, 0x7FFF, 0, 0 );
        !          2549:     }
        !          2550:     if ( aExp == 0 ) {
        !          2551:         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
        !          2552:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          2553:         --aExp;
        !          2554:     }
        !          2555:     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
        !          2556:     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
        !          2557:
        !          2558: }
        !          2559:
        !          2560: #endif
        !          2561:
        !          2562: #ifndef SOFTFLOAT_FOR_GCC
        !          2563: /*
        !          2564: -------------------------------------------------------------------------------
        !          2565: Rounds the double-precision floating-point value `a' to an integer, and
        !          2566: returns the result as a double-precision floating-point value.  The
        !          2567: operation is performed according to the IEC/IEEE Standard for Binary
        !          2568: Floating-Point Arithmetic.
        !          2569: -------------------------------------------------------------------------------
        !          2570: */
        !          2571: float64 float64_round_to_int( float64 a )
        !          2572: {
        !          2573:     flag aSign;
        !          2574:     int16 aExp;
        !          2575:     bits64 lastBitMask, roundBitsMask;
        !          2576:     int8 roundingMode;
        !          2577:     float64 z;
        !          2578:
        !          2579:     aExp = extractFloat64Exp( a );
        !          2580:     if ( 0x433 <= aExp ) {
        !          2581:         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
        !          2582:             return propagateFloat64NaN( a, a );
        !          2583:         }
        !          2584:         return a;
        !          2585:     }
        !          2586:     if ( aExp < 0x3FF ) {
        !          2587:         if ( (bits64) ( a<<1 ) == 0 ) return a;
        !          2588:         float_set_inexact();
        !          2589:         aSign = extractFloat64Sign( a );
        !          2590:         switch ( float_rounding_mode() ) {
        !          2591:          case float_round_nearest_even:
        !          2592:             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
        !          2593:                 return packFloat64( aSign, 0x3FF, 0 );
        !          2594:             }
        !          2595:             break;
        !          2596:          case float_round_down:
        !          2597:             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
        !          2598:          case float_round_up:
        !          2599:             return
        !          2600:             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
        !          2601:         }
        !          2602:         return packFloat64( aSign, 0, 0 );
        !          2603:     }
        !          2604:     lastBitMask = 1;
        !          2605:     lastBitMask <<= 0x433 - aExp;
        !          2606:     roundBitsMask = lastBitMask - 1;
        !          2607:     z = a;
        !          2608:     roundingMode = float_rounding_mode();
        !          2609:     if ( roundingMode == float_round_nearest_even ) {
        !          2610:         z += lastBitMask>>1;
        !          2611:         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
        !          2612:     }
        !          2613:     else if ( roundingMode != float_round_to_zero ) {
        !          2614:         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
        !          2615:             z += roundBitsMask;
        !          2616:         }
        !          2617:     }
        !          2618:     z &= ~ roundBitsMask;
        !          2619:     if ( z != a ) float_set_inexact();
        !          2620:     return z;
        !          2621:
        !          2622: }
        !          2623: #endif
        !          2624:
        !          2625: /*
        !          2626: -------------------------------------------------------------------------------
        !          2627: Returns the result of adding the absolute values of the double-precision
        !          2628: floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
        !          2629: before being returned.  `zSign' is ignored if the result is a NaN.
        !          2630: The addition is performed according to the IEC/IEEE Standard for Binary
        !          2631: Floating-Point Arithmetic.
        !          2632: -------------------------------------------------------------------------------
        !          2633: */
        !          2634: static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
        !          2635: {
        !          2636:     int16 aExp, bExp, zExp;
        !          2637:     bits64 aSig, bSig, zSig;
        !          2638:     int16 expDiff;
        !          2639:
        !          2640:     aSig = extractFloat64Frac( a );
        !          2641:     aExp = extractFloat64Exp( a );
        !          2642:     bSig = extractFloat64Frac( b );
        !          2643:     bExp = extractFloat64Exp( b );
        !          2644:     expDiff = aExp - bExp;
        !          2645:     aSig <<= 9;
        !          2646:     bSig <<= 9;
        !          2647:     if ( 0 < expDiff ) {
        !          2648:         if ( aExp == 0x7FF ) {
        !          2649:             if ( aSig ) return propagateFloat64NaN( a, b );
        !          2650:             return a;
        !          2651:         }
        !          2652:         if ( bExp == 0 ) {
        !          2653:             --expDiff;
        !          2654:         }
        !          2655:         else {
        !          2656:             bSig |= LIT64( 0x2000000000000000 );
        !          2657:         }
        !          2658:         shift64RightJamming( bSig, expDiff, &bSig );
        !          2659:         zExp = aExp;
        !          2660:     }
        !          2661:     else if ( expDiff < 0 ) {
        !          2662:         if ( bExp == 0x7FF ) {
        !          2663:             if ( bSig ) return propagateFloat64NaN( a, b );
        !          2664:             return packFloat64( zSign, 0x7FF, 0 );
        !          2665:         }
        !          2666:         if ( aExp == 0 ) {
        !          2667:             ++expDiff;
        !          2668:         }
        !          2669:         else {
        !          2670:             aSig |= LIT64( 0x2000000000000000 );
        !          2671:         }
        !          2672:         shift64RightJamming( aSig, - expDiff, &aSig );
        !          2673:         zExp = bExp;
        !          2674:     }
        !          2675:     else {
        !          2676:         if ( aExp == 0x7FF ) {
        !          2677:             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
        !          2678:             return a;
        !          2679:         }
        !          2680:         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
        !          2681:         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
        !          2682:         zExp = aExp;
        !          2683:         goto roundAndPack;
        !          2684:     }
        !          2685:     aSig |= LIT64( 0x2000000000000000 );
        !          2686:     zSig = ( aSig + bSig )<<1;
        !          2687:     --zExp;
        !          2688:     if ( (sbits64) zSig < 0 ) {
        !          2689:         zSig = aSig + bSig;
        !          2690:         ++zExp;
        !          2691:     }
        !          2692:  roundAndPack:
        !          2693:     return roundAndPackFloat64( zSign, zExp, zSig );
        !          2694:
        !          2695: }
        !          2696:
        !          2697: /*
        !          2698: -------------------------------------------------------------------------------
        !          2699: Returns the result of subtracting the absolute values of the double-
        !          2700: precision floating-point values `a' and `b'.  If `zSign' is 1, the
        !          2701: difference is negated before being returned.  `zSign' is ignored if the
        !          2702: result is a NaN.  The subtraction is performed according to the IEC/IEEE
        !          2703: Standard for Binary Floating-Point Arithmetic.
        !          2704: -------------------------------------------------------------------------------
        !          2705: */
        !          2706: static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
        !          2707: {
        !          2708:     int16 aExp, bExp, zExp;
        !          2709:     bits64 aSig, bSig, zSig;
        !          2710:     int16 expDiff;
        !          2711:
        !          2712:     aSig = extractFloat64Frac( a );
        !          2713:     aExp = extractFloat64Exp( a );
        !          2714:     bSig = extractFloat64Frac( b );
        !          2715:     bExp = extractFloat64Exp( b );
        !          2716:     expDiff = aExp - bExp;
        !          2717:     aSig <<= 10;
        !          2718:     bSig <<= 10;
        !          2719:     if ( 0 < expDiff ) goto aExpBigger;
        !          2720:     if ( expDiff < 0 ) goto bExpBigger;
        !          2721:     if ( aExp == 0x7FF ) {
        !          2722:         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
        !          2723:         float_raise( float_flag_invalid );
        !          2724:         return float64_default_nan;
        !          2725:     }
        !          2726:     if ( aExp == 0 ) {
        !          2727:         aExp = 1;
        !          2728:         bExp = 1;
        !          2729:     }
        !          2730:     if ( bSig < aSig ) goto aBigger;
        !          2731:     if ( aSig < bSig ) goto bBigger;
        !          2732:     return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
        !          2733:  bExpBigger:
        !          2734:     if ( bExp == 0x7FF ) {
        !          2735:         if ( bSig ) return propagateFloat64NaN( a, b );
        !          2736:         return packFloat64( zSign ^ 1, 0x7FF, 0 );
        !          2737:     }
        !          2738:     if ( aExp == 0 ) {
        !          2739:         ++expDiff;
        !          2740:     }
        !          2741:     else {
        !          2742:         aSig |= LIT64( 0x4000000000000000 );
        !          2743:     }
        !          2744:     shift64RightJamming( aSig, - expDiff, &aSig );
        !          2745:     bSig |= LIT64( 0x4000000000000000 );
        !          2746:  bBigger:
        !          2747:     zSig = bSig - aSig;
        !          2748:     zExp = bExp;
        !          2749:     zSign ^= 1;
        !          2750:     goto normalizeRoundAndPack;
        !          2751:  aExpBigger:
        !          2752:     if ( aExp == 0x7FF ) {
        !          2753:         if ( aSig ) return propagateFloat64NaN( a, b );
        !          2754:         return a;
        !          2755:     }
        !          2756:     if ( bExp == 0 ) {
        !          2757:         --expDiff;
        !          2758:     }
        !          2759:     else {
        !          2760:         bSig |= LIT64( 0x4000000000000000 );
        !          2761:     }
        !          2762:     shift64RightJamming( bSig, expDiff, &bSig );
        !          2763:     aSig |= LIT64( 0x4000000000000000 );
        !          2764:  aBigger:
        !          2765:     zSig = aSig - bSig;
        !          2766:     zExp = aExp;
        !          2767:  normalizeRoundAndPack:
        !          2768:     --zExp;
        !          2769:     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
        !          2770:
        !          2771: }
        !          2772:
        !          2773: /*
        !          2774: -------------------------------------------------------------------------------
        !          2775: Returns the result of adding the double-precision floating-point values `a'
        !          2776: and `b'.  The operation is performed according to the IEC/IEEE Standard for
        !          2777: Binary Floating-Point Arithmetic.
        !          2778: -------------------------------------------------------------------------------
        !          2779: */
        !          2780: float64 float64_add( float64 a, float64 b )
        !          2781: {
        !          2782:     flag aSign, bSign;
        !          2783:
        !          2784:     aSign = extractFloat64Sign( a );
        !          2785:     bSign = extractFloat64Sign( b );
        !          2786:     if ( aSign == bSign ) {
        !          2787:         return addFloat64Sigs( a, b, aSign );
        !          2788:     }
        !          2789:     else {
        !          2790:         return subFloat64Sigs( a, b, aSign );
        !          2791:     }
        !          2792:
        !          2793: }
        !          2794:
        !          2795: /*
        !          2796: -------------------------------------------------------------------------------
        !          2797: Returns the result of subtracting the double-precision floating-point values
        !          2798: `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
        !          2799: for Binary Floating-Point Arithmetic.
        !          2800: -------------------------------------------------------------------------------
        !          2801: */
        !          2802: float64 float64_sub( float64 a, float64 b )
        !          2803: {
        !          2804:     flag aSign, bSign;
        !          2805:
        !          2806:     aSign = extractFloat64Sign( a );
        !          2807:     bSign = extractFloat64Sign( b );
        !          2808:     if ( aSign == bSign ) {
        !          2809:         return subFloat64Sigs( a, b, aSign );
        !          2810:     }
        !          2811:     else {
        !          2812:         return addFloat64Sigs( a, b, aSign );
        !          2813:     }
        !          2814:
        !          2815: }
        !          2816:
        !          2817: /*
        !          2818: -------------------------------------------------------------------------------
        !          2819: Returns the result of multiplying the double-precision floating-point values
        !          2820: `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
        !          2821: for Binary Floating-Point Arithmetic.
        !          2822: -------------------------------------------------------------------------------
        !          2823: */
        !          2824: float64 float64_mul( float64 a, float64 b )
        !          2825: {
        !          2826:     flag aSign, bSign, zSign;
        !          2827:     int16 aExp, bExp, zExp;
        !          2828:     bits64 aSig, bSig, zSig0, zSig1;
        !          2829:
        !          2830:     aSig = extractFloat64Frac( a );
        !          2831:     aExp = extractFloat64Exp( a );
        !          2832:     aSign = extractFloat64Sign( a );
        !          2833:     bSig = extractFloat64Frac( b );
        !          2834:     bExp = extractFloat64Exp( b );
        !          2835:     bSign = extractFloat64Sign( b );
        !          2836:     zSign = aSign ^ bSign;
        !          2837:     if ( aExp == 0x7FF ) {
        !          2838:         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
        !          2839:             return propagateFloat64NaN( a, b );
        !          2840:         }
        !          2841:         if ( ( bExp | bSig ) == 0 ) {
        !          2842:             float_raise( float_flag_invalid );
        !          2843:             return float64_default_nan;
        !          2844:         }
        !          2845:         return packFloat64( zSign, 0x7FF, 0 );
        !          2846:     }
        !          2847:     if ( bExp == 0x7FF ) {
        !          2848:         if ( bSig ) return propagateFloat64NaN( a, b );
        !          2849:         if ( ( aExp | aSig ) == 0 ) {
        !          2850:             float_raise( float_flag_invalid );
        !          2851:             return float64_default_nan;
        !          2852:         }
        !          2853:         return packFloat64( zSign, 0x7FF, 0 );
        !          2854:     }
        !          2855:     if ( aExp == 0 ) {
        !          2856:         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
        !          2857:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          2858:     }
        !          2859:     if ( bExp == 0 ) {
        !          2860:         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
        !          2861:         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
        !          2862:     }
        !          2863:     zExp = aExp + bExp - 0x3FF;
        !          2864:     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
        !          2865:     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
        !          2866:     mul64To128( aSig, bSig, &zSig0, &zSig1 );
        !          2867:     zSig0 |= ( zSig1 != 0 );
        !          2868:     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
        !          2869:         zSig0 <<= 1;
        !          2870:         --zExp;
        !          2871:     }
        !          2872:     return roundAndPackFloat64( zSign, zExp, zSig0 );
        !          2873:
        !          2874: }
        !          2875:
        !          2876: /*
        !          2877: -------------------------------------------------------------------------------
        !          2878: Returns the result of dividing the double-precision floating-point value `a'
        !          2879: by the corresponding value `b'.  The operation is performed according to
        !          2880: the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2881: -------------------------------------------------------------------------------
        !          2882: */
        !          2883: float64 float64_div( float64 a, float64 b )
        !          2884: {
        !          2885:     flag aSign, bSign, zSign;
        !          2886:     int16 aExp, bExp, zExp;
        !          2887:     bits64 aSig, bSig, zSig;
        !          2888:     bits64 rem0, rem1;
        !          2889:     bits64 term0, term1;
        !          2890:
        !          2891:     aSig = extractFloat64Frac( a );
        !          2892:     aExp = extractFloat64Exp( a );
        !          2893:     aSign = extractFloat64Sign( a );
        !          2894:     bSig = extractFloat64Frac( b );
        !          2895:     bExp = extractFloat64Exp( b );
        !          2896:     bSign = extractFloat64Sign( b );
        !          2897:     zSign = aSign ^ bSign;
        !          2898:     if ( aExp == 0x7FF ) {
        !          2899:         if ( aSig ) return propagateFloat64NaN( a, b );
        !          2900:         if ( bExp == 0x7FF ) {
        !          2901:             if ( bSig ) return propagateFloat64NaN( a, b );
        !          2902:             float_raise( float_flag_invalid );
        !          2903:             return float64_default_nan;
        !          2904:         }
        !          2905:         return packFloat64( zSign, 0x7FF, 0 );
        !          2906:     }
        !          2907:     if ( bExp == 0x7FF ) {
        !          2908:         if ( bSig ) return propagateFloat64NaN( a, b );
        !          2909:         return packFloat64( zSign, 0, 0 );
        !          2910:     }
        !          2911:     if ( bExp == 0 ) {
        !          2912:         if ( bSig == 0 ) {
        !          2913:             if ( ( aExp | aSig ) == 0 ) {
        !          2914:                 float_raise( float_flag_invalid );
        !          2915:                 return float64_default_nan;
        !          2916:             }
        !          2917:             float_raise( float_flag_divbyzero );
        !          2918:             return packFloat64( zSign, 0x7FF, 0 );
        !          2919:         }
        !          2920:         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
        !          2921:     }
        !          2922:     if ( aExp == 0 ) {
        !          2923:         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
        !          2924:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          2925:     }
        !          2926:     zExp = aExp - bExp + 0x3FD;
        !          2927:     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
        !          2928:     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
        !          2929:     if ( bSig <= ( aSig + aSig ) ) {
        !          2930:         aSig >>= 1;
        !          2931:         ++zExp;
        !          2932:     }
        !          2933:     zSig = estimateDiv128To64( aSig, 0, bSig );
        !          2934:     if ( ( zSig & 0x1FF ) <= 2 ) {
        !          2935:         mul64To128( bSig, zSig, &term0, &term1 );
        !          2936:         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
        !          2937:         while ( (sbits64) rem0 < 0 ) {
        !          2938:             --zSig;
        !          2939:             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
        !          2940:         }
        !          2941:         zSig |= ( rem1 != 0 );
        !          2942:     }
        !          2943:     return roundAndPackFloat64( zSign, zExp, zSig );
        !          2944:
        !          2945: }
        !          2946:
        !          2947: #ifndef SOFTFLOAT_FOR_GCC
        !          2948: /*
        !          2949: -------------------------------------------------------------------------------
        !          2950: Returns the remainder of the double-precision floating-point value `a'
        !          2951: with respect to the corresponding value `b'.  The operation is performed
        !          2952: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          2953: -------------------------------------------------------------------------------
        !          2954: */
        !          2955: float64 float64_rem( float64 a, float64 b )
        !          2956: {
        !          2957:     flag aSign, bSign, zSign;
        !          2958:     int16 aExp, bExp, expDiff;
        !          2959:     bits64 aSig, bSig;
        !          2960:     bits64 q, alternateASig;
        !          2961:     sbits64 sigMean;
        !          2962:
        !          2963:     aSig = extractFloat64Frac( a );
        !          2964:     aExp = extractFloat64Exp( a );
        !          2965:     aSign = extractFloat64Sign( a );
        !          2966:     bSig = extractFloat64Frac( b );
        !          2967:     bExp = extractFloat64Exp( b );
        !          2968:     bSign = extractFloat64Sign( b );
        !          2969:     if ( aExp == 0x7FF ) {
        !          2970:         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
        !          2971:             return propagateFloat64NaN( a, b );
        !          2972:         }
        !          2973:         float_raise( float_flag_invalid );
        !          2974:         return float64_default_nan;
        !          2975:     }
        !          2976:     if ( bExp == 0x7FF ) {
        !          2977:         if ( bSig ) return propagateFloat64NaN( a, b );
        !          2978:         return a;
        !          2979:     }
        !          2980:     if ( bExp == 0 ) {
        !          2981:         if ( bSig == 0 ) {
        !          2982:             float_raise( float_flag_invalid );
        !          2983:             return float64_default_nan;
        !          2984:         }
        !          2985:         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
        !          2986:     }
        !          2987:     if ( aExp == 0 ) {
        !          2988:         if ( aSig == 0 ) return a;
        !          2989:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          2990:     }
        !          2991:     expDiff = aExp - bExp;
        !          2992:     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
        !          2993:     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
        !          2994:     if ( expDiff < 0 ) {
        !          2995:         if ( expDiff < -1 ) return a;
        !          2996:         aSig >>= 1;
        !          2997:     }
        !          2998:     q = ( bSig <= aSig );
        !          2999:     if ( q ) aSig -= bSig;
        !          3000:     expDiff -= 64;
        !          3001:     while ( 0 < expDiff ) {
        !          3002:         q = estimateDiv128To64( aSig, 0, bSig );
        !          3003:         q = ( 2 < q ) ? q - 2 : 0;
        !          3004:         aSig = - ( ( bSig>>2 ) * q );
        !          3005:         expDiff -= 62;
        !          3006:     }
        !          3007:     expDiff += 64;
        !          3008:     if ( 0 < expDiff ) {
        !          3009:         q = estimateDiv128To64( aSig, 0, bSig );
        !          3010:         q = ( 2 < q ) ? q - 2 : 0;
        !          3011:         q >>= 64 - expDiff;
        !          3012:         bSig >>= 2;
        !          3013:         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
        !          3014:     }
        !          3015:     else {
        !          3016:         aSig >>= 2;
        !          3017:         bSig >>= 2;
        !          3018:     }
        !          3019:     do {
        !          3020:         alternateASig = aSig;
        !          3021:         ++q;
        !          3022:         aSig -= bSig;
        !          3023:     } while ( 0 <= (sbits64) aSig );
        !          3024:     sigMean = aSig + alternateASig;
        !          3025:     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
        !          3026:         aSig = alternateASig;
        !          3027:     }
        !          3028:     zSign = ( (sbits64) aSig < 0 );
        !          3029:     if ( zSign ) aSig = - aSig;
        !          3030:     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
        !          3031:
        !          3032: }
        !          3033:
        !          3034: /*
        !          3035: -------------------------------------------------------------------------------
        !          3036: Returns the square root of the double-precision floating-point value `a'.
        !          3037: The operation is performed according to the IEC/IEEE Standard for Binary
        !          3038: Floating-Point Arithmetic.
        !          3039: -------------------------------------------------------------------------------
        !          3040: */
        !          3041: float64 float64_sqrt( float64 a )
        !          3042: {
        !          3043:     flag aSign;
        !          3044:     int16 aExp, zExp;
        !          3045:     bits64 aSig, zSig, doubleZSig;
        !          3046:     bits64 rem0, rem1, term0, term1;
        !          3047:
        !          3048:     aSig = extractFloat64Frac( a );
        !          3049:     aExp = extractFloat64Exp( a );
        !          3050:     aSign = extractFloat64Sign( a );
        !          3051:     if ( aExp == 0x7FF ) {
        !          3052:         if ( aSig ) return propagateFloat64NaN( a, a );
        !          3053:         if ( ! aSign ) return a;
        !          3054:         float_raise( float_flag_invalid );
        !          3055:         return float64_default_nan;
        !          3056:     }
        !          3057:     if ( aSign ) {
        !          3058:         if ( ( aExp | aSig ) == 0 ) return a;
        !          3059:         float_raise( float_flag_invalid );
        !          3060:         return float64_default_nan;
        !          3061:     }
        !          3062:     if ( aExp == 0 ) {
        !          3063:         if ( aSig == 0 ) return 0;
        !          3064:         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
        !          3065:     }
        !          3066:     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
        !          3067:     aSig |= LIT64( 0x0010000000000000 );
        !          3068:     zSig = estimateSqrt32( aExp, aSig>>21 );
        !          3069:     aSig <<= 9 - ( aExp & 1 );
        !          3070:     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
        !          3071:     if ( ( zSig & 0x1FF ) <= 5 ) {
        !          3072:         doubleZSig = zSig<<1;
        !          3073:         mul64To128( zSig, zSig, &term0, &term1 );
        !          3074:         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
        !          3075:         while ( (sbits64) rem0 < 0 ) {
        !          3076:             --zSig;
        !          3077:             doubleZSig -= 2;
        !          3078:             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
        !          3079:         }
        !          3080:         zSig |= ( ( rem0 | rem1 ) != 0 );
        !          3081:     }
        !          3082:     return roundAndPackFloat64( 0, zExp, zSig );
        !          3083:
        !          3084: }
        !          3085: #endif
        !          3086:
        !          3087: /*
        !          3088: -------------------------------------------------------------------------------
        !          3089: Returns 1 if the double-precision floating-point value `a' is equal to the
        !          3090: corresponding value `b', and 0 otherwise.  The comparison is performed
        !          3091: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3092: -------------------------------------------------------------------------------
        !          3093: */
        !          3094: flag float64_eq( float64 a, float64 b )
        !          3095: {
        !          3096:
        !          3097:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3098:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3099:        ) {
        !          3100:         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
        !          3101:             float_raise( float_flag_invalid );
        !          3102:         }
        !          3103:         return 0;
        !          3104:     }
        !          3105:     return ( a == b ) ||
        !          3106:        ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
        !          3107:
        !          3108: }
        !          3109:
        !          3110: /*
        !          3111: -------------------------------------------------------------------------------
        !          3112: Returns 1 if the double-precision floating-point value `a' is less than or
        !          3113: equal to the corresponding value `b', and 0 otherwise.  The comparison is
        !          3114: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          3115: Arithmetic.
        !          3116: -------------------------------------------------------------------------------
        !          3117: */
        !          3118: flag float64_le( float64 a, float64 b )
        !          3119: {
        !          3120:     flag aSign, bSign;
        !          3121:
        !          3122:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3123:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3124:        ) {
        !          3125:         float_raise( float_flag_invalid );
        !          3126:         return 0;
        !          3127:     }
        !          3128:     aSign = extractFloat64Sign( a );
        !          3129:     bSign = extractFloat64Sign( b );
        !          3130:     if ( aSign != bSign )
        !          3131:        return aSign ||
        !          3132:            ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
        !          3133:              0 );
        !          3134:     return ( a == b ) ||
        !          3135:        ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
        !          3136:
        !          3137: }
        !          3138:
        !          3139: /*
        !          3140: -------------------------------------------------------------------------------
        !          3141: Returns 1 if the double-precision floating-point value `a' is less than
        !          3142: the corresponding value `b', and 0 otherwise.  The comparison is performed
        !          3143: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3144: -------------------------------------------------------------------------------
        !          3145: */
        !          3146: flag float64_lt( float64 a, float64 b )
        !          3147: {
        !          3148:     flag aSign, bSign;
        !          3149:
        !          3150:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3151:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3152:        ) {
        !          3153:         float_raise( float_flag_invalid );
        !          3154:         return 0;
        !          3155:     }
        !          3156:     aSign = extractFloat64Sign( a );
        !          3157:     bSign = extractFloat64Sign( b );
        !          3158:     if ( aSign != bSign )
        !          3159:        return aSign &&
        !          3160:            ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
        !          3161:              0 );
        !          3162:     return ( a != b ) &&
        !          3163:        ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
        !          3164:
        !          3165: }
        !          3166:
        !          3167: #ifndef SOFTFLOAT_FOR_GCC
        !          3168: /*
        !          3169: -------------------------------------------------------------------------------
        !          3170: Returns 1 if the double-precision floating-point value `a' is equal to the
        !          3171: corresponding value `b', and 0 otherwise.  The invalid exception is raised
        !          3172: if either operand is a NaN.  Otherwise, the comparison is performed
        !          3173: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3174: -------------------------------------------------------------------------------
        !          3175: */
        !          3176: flag float64_eq_signaling( float64 a, float64 b )
        !          3177: {
        !          3178:
        !          3179:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3180:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3181:        ) {
        !          3182:         float_raise( float_flag_invalid );
        !          3183:         return 0;
        !          3184:     }
        !          3185:     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
        !          3186:
        !          3187: }
        !          3188:
        !          3189: /*
        !          3190: -------------------------------------------------------------------------------
        !          3191: Returns 1 if the double-precision floating-point value `a' is less than or
        !          3192: equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
        !          3193: cause an exception.  Otherwise, the comparison is performed according to the
        !          3194: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3195: -------------------------------------------------------------------------------
        !          3196: */
        !          3197: flag float64_le_quiet( float64 a, float64 b )
        !          3198: {
        !          3199:     flag aSign, bSign;
        !          3200:
        !          3201:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3202:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3203:        ) {
        !          3204:         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
        !          3205:             float_raise( float_flag_invalid );
        !          3206:         }
        !          3207:         return 0;
        !          3208:     }
        !          3209:     aSign = extractFloat64Sign( a );
        !          3210:     bSign = extractFloat64Sign( b );
        !          3211:     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
        !          3212:     return ( a == b ) || ( aSign ^ ( a < b ) );
        !          3213:
        !          3214: }
        !          3215:
        !          3216: /*
        !          3217: -------------------------------------------------------------------------------
        !          3218: Returns 1 if the double-precision floating-point value `a' is less than
        !          3219: the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
        !          3220: exception.  Otherwise, the comparison is performed according to the IEC/IEEE
        !          3221: Standard for Binary Floating-Point Arithmetic.
        !          3222: -------------------------------------------------------------------------------
        !          3223: */
        !          3224: flag float64_lt_quiet( float64 a, float64 b )
        !          3225: {
        !          3226:     flag aSign, bSign;
        !          3227:
        !          3228:     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
        !          3229:          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        !          3230:        ) {
        !          3231:         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
        !          3232:             float_raise( float_flag_invalid );
        !          3233:         }
        !          3234:         return 0;
        !          3235:     }
        !          3236:     aSign = extractFloat64Sign( a );
        !          3237:     bSign = extractFloat64Sign( b );
        !          3238:     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
        !          3239:     return ( a != b ) && ( aSign ^ ( a < b ) );
        !          3240:
        !          3241: }
        !          3242: #endif
        !          3243:
        !          3244: #ifdef FLOATX80
        !          3245:
        !          3246: /*
        !          3247: -------------------------------------------------------------------------------
        !          3248: Returns the result of converting the extended double-precision floating-
        !          3249: point value `a' to the 32-bit two's complement integer format.  The
        !          3250: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3251: Floating-Point Arithmetic---which means in particular that the conversion
        !          3252: is rounded according to the current rounding mode.  If `a' is a NaN, the
        !          3253: largest positive integer is returned.  Otherwise, if the conversion
        !          3254: overflows, the largest integer with the same sign as `a' is returned.
        !          3255: -------------------------------------------------------------------------------
        !          3256: */
        !          3257: int32 floatx80_to_int32( floatx80 a )
        !          3258: {
        !          3259:     flag aSign;
        !          3260:     int32 aExp, shiftCount;
        !          3261:     bits64 aSig;
        !          3262:
        !          3263:     aSig = extractFloatx80Frac( a );
        !          3264:     aExp = extractFloatx80Exp( a );
        !          3265:     aSign = extractFloatx80Sign( a );
        !          3266:     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
        !          3267:     shiftCount = 0x4037 - aExp;
        !          3268:     if ( shiftCount <= 0 ) shiftCount = 1;
        !          3269:     shift64RightJamming( aSig, shiftCount, &aSig );
        !          3270:     return roundAndPackInt32( aSign, aSig );
        !          3271:
        !          3272: }
        !          3273:
        !          3274: /*
        !          3275: -------------------------------------------------------------------------------
        !          3276: Returns the result of converting the extended double-precision floating-
        !          3277: point value `a' to the 32-bit two's complement integer format.  The
        !          3278: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3279: Floating-Point Arithmetic, except that the conversion is always rounded
        !          3280: toward zero.  If `a' is a NaN, the largest positive integer is returned.
        !          3281: Otherwise, if the conversion overflows, the largest integer with the same
        !          3282: sign as `a' is returned.
        !          3283: -------------------------------------------------------------------------------
        !          3284: */
        !          3285: int32 floatx80_to_int32_round_to_zero( floatx80 a )
        !          3286: {
        !          3287:     flag aSign;
        !          3288:     int32 aExp, shiftCount;
        !          3289:     bits64 aSig, savedASig;
        !          3290:     int32 z;
        !          3291:
        !          3292:     aSig = extractFloatx80Frac( a );
        !          3293:     aExp = extractFloatx80Exp( a );
        !          3294:     aSign = extractFloatx80Sign( a );
        !          3295:     if ( 0x401E < aExp ) {
        !          3296:         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
        !          3297:         goto invalid;
        !          3298:     }
        !          3299:     else if ( aExp < 0x3FFF ) {
        !          3300:         if ( aExp || aSig ) float_set_inexact();
        !          3301:         return 0;
        !          3302:     }
        !          3303:     shiftCount = 0x403E - aExp;
        !          3304:     savedASig = aSig;
        !          3305:     aSig >>= shiftCount;
        !          3306:     z = aSig;
        !          3307:     if ( aSign ) z = - z;
        !          3308:     if ( ( z < 0 ) ^ aSign ) {
        !          3309:  invalid:
        !          3310:         float_raise( float_flag_invalid );
        !          3311:         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
        !          3312:     }
        !          3313:     if ( ( aSig<<shiftCount ) != savedASig ) {
        !          3314:         float_set_inexact();
        !          3315:     }
        !          3316:     return z;
        !          3317:
        !          3318: }
        !          3319:
        !          3320: /*
        !          3321: -------------------------------------------------------------------------------
        !          3322: Returns the result of converting the extended double-precision floating-
        !          3323: point value `a' to the 64-bit two's complement integer format.  The
        !          3324: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3325: Floating-Point Arithmetic---which means in particular that the conversion
        !          3326: is rounded according to the current rounding mode.  If `a' is a NaN,
        !          3327: the largest positive integer is returned.  Otherwise, if the conversion
        !          3328: overflows, the largest integer with the same sign as `a' is returned.
        !          3329: -------------------------------------------------------------------------------
        !          3330: */
        !          3331: int64 floatx80_to_int64( floatx80 a )
        !          3332: {
        !          3333:     flag aSign;
        !          3334:     int32 aExp, shiftCount;
        !          3335:     bits64 aSig, aSigExtra;
        !          3336:
        !          3337:     aSig = extractFloatx80Frac( a );
        !          3338:     aExp = extractFloatx80Exp( a );
        !          3339:     aSign = extractFloatx80Sign( a );
        !          3340:     shiftCount = 0x403E - aExp;
        !          3341:     if ( shiftCount <= 0 ) {
        !          3342:         if ( shiftCount ) {
        !          3343:             float_raise( float_flag_invalid );
        !          3344:             if (    ! aSign
        !          3345:                  || (    ( aExp == 0x7FFF )
        !          3346:                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
        !          3347:                ) {
        !          3348:                 return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          3349:             }
        !          3350:             return (sbits64) LIT64( 0x8000000000000000 );
        !          3351:         }
        !          3352:         aSigExtra = 0;
        !          3353:     }
        !          3354:     else {
        !          3355:         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
        !          3356:     }
        !          3357:     return roundAndPackInt64( aSign, aSig, aSigExtra );
        !          3358:
        !          3359: }
        !          3360:
        !          3361: /*
        !          3362: -------------------------------------------------------------------------------
        !          3363: Returns the result of converting the extended double-precision floating-
        !          3364: point value `a' to the 64-bit two's complement integer format.  The
        !          3365: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3366: Floating-Point Arithmetic, except that the conversion is always rounded
        !          3367: toward zero.  If `a' is a NaN, the largest positive integer is returned.
        !          3368: Otherwise, if the conversion overflows, the largest integer with the same
        !          3369: sign as `a' is returned.
        !          3370: -------------------------------------------------------------------------------
        !          3371: */
        !          3372: int64 floatx80_to_int64_round_to_zero( floatx80 a )
        !          3373: {
        !          3374:     flag aSign;
        !          3375:     int32 aExp, shiftCount;
        !          3376:     bits64 aSig;
        !          3377:     int64 z;
        !          3378:
        !          3379:     aSig = extractFloatx80Frac( a );
        !          3380:     aExp = extractFloatx80Exp( a );
        !          3381:     aSign = extractFloatx80Sign( a );
        !          3382:     shiftCount = aExp - 0x403E;
        !          3383:     if ( 0 <= shiftCount ) {
        !          3384:         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
        !          3385:         if ( ( a.high != 0xC03E ) || aSig ) {
        !          3386:             float_raise( float_flag_invalid );
        !          3387:             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
        !          3388:                 return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          3389:             }
        !          3390:         }
        !          3391:         return (sbits64) LIT64( 0x8000000000000000 );
        !          3392:     }
        !          3393:     else if ( aExp < 0x3FFF ) {
        !          3394:         if ( aExp | aSig ) float_set_inexact();
        !          3395:         return 0;
        !          3396:     }
        !          3397:     z = aSig>>( - shiftCount );
        !          3398:     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
        !          3399:         float_set_inexact();
        !          3400:     }
        !          3401:     if ( aSign ) z = - z;
        !          3402:     return z;
        !          3403:
        !          3404: }
        !          3405:
        !          3406: /*
        !          3407: -------------------------------------------------------------------------------
        !          3408: Returns the result of converting the extended double-precision floating-
        !          3409: point value `a' to the single-precision floating-point format.  The
        !          3410: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3411: Floating-Point Arithmetic.
        !          3412: -------------------------------------------------------------------------------
        !          3413: */
        !          3414: float32 floatx80_to_float32( floatx80 a )
        !          3415: {
        !          3416:     flag aSign;
        !          3417:     int32 aExp;
        !          3418:     bits64 aSig;
        !          3419:
        !          3420:     aSig = extractFloatx80Frac( a );
        !          3421:     aExp = extractFloatx80Exp( a );
        !          3422:     aSign = extractFloatx80Sign( a );
        !          3423:     if ( aExp == 0x7FFF ) {
        !          3424:         if ( (bits64) ( aSig<<1 ) ) {
        !          3425:             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
        !          3426:         }
        !          3427:         return packFloat32( aSign, 0xFF, 0 );
        !          3428:     }
        !          3429:     shift64RightJamming( aSig, 33, &aSig );
        !          3430:     if ( aExp || aSig ) aExp -= 0x3F81;
        !          3431:     return roundAndPackFloat32( aSign, aExp, aSig );
        !          3432:
        !          3433: }
        !          3434:
        !          3435: /*
        !          3436: -------------------------------------------------------------------------------
        !          3437: Returns the result of converting the extended double-precision floating-
        !          3438: point value `a' to the double-precision floating-point format.  The
        !          3439: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3440: Floating-Point Arithmetic.
        !          3441: -------------------------------------------------------------------------------
        !          3442: */
        !          3443: float64 floatx80_to_float64( floatx80 a )
        !          3444: {
        !          3445:     flag aSign;
        !          3446:     int32 aExp;
        !          3447:     bits64 aSig, zSig;
        !          3448:
        !          3449:     aSig = extractFloatx80Frac( a );
        !          3450:     aExp = extractFloatx80Exp( a );
        !          3451:     aSign = extractFloatx80Sign( a );
        !          3452:     if ( aExp == 0x7FFF ) {
        !          3453:         if ( (bits64) ( aSig<<1 ) ) {
        !          3454:             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
        !          3455:         }
        !          3456:         return packFloat64( aSign, 0x7FF, 0 );
        !          3457:     }
        !          3458:     shift64RightJamming( aSig, 1, &zSig );
        !          3459:     if ( aExp || aSig ) aExp -= 0x3C01;
        !          3460:     return roundAndPackFloat64( aSign, aExp, zSig );
        !          3461:
        !          3462: }
        !          3463:
        !          3464: #ifdef FLOAT128
        !          3465:
        !          3466: /*
        !          3467: -------------------------------------------------------------------------------
        !          3468: Returns the result of converting the extended double-precision floating-
        !          3469: point value `a' to the quadruple-precision floating-point format.  The
        !          3470: conversion is performed according to the IEC/IEEE Standard for Binary
        !          3471: Floating-Point Arithmetic.
        !          3472: -------------------------------------------------------------------------------
        !          3473: */
        !          3474: float128 floatx80_to_float128( floatx80 a )
        !          3475: {
        !          3476:     flag aSign;
        !          3477:     int16 aExp;
        !          3478:     bits64 aSig, zSig0, zSig1;
        !          3479:
        !          3480:     aSig = extractFloatx80Frac( a );
        !          3481:     aExp = extractFloatx80Exp( a );
        !          3482:     aSign = extractFloatx80Sign( a );
        !          3483:     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
        !          3484:         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
        !          3485:     }
        !          3486:     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
        !          3487:     return packFloat128( aSign, aExp, zSig0, zSig1 );
        !          3488:
        !          3489: }
        !          3490:
        !          3491: #endif
        !          3492:
        !          3493: /*
        !          3494: -------------------------------------------------------------------------------
        !          3495: Rounds the extended double-precision floating-point value `a' to an integer,
        !          3496: and returns the result as an extended quadruple-precision floating-point
        !          3497: value.  The operation is performed according to the IEC/IEEE Standard for
        !          3498: Binary Floating-Point Arithmetic.
        !          3499: -------------------------------------------------------------------------------
        !          3500: */
        !          3501: floatx80 floatx80_round_to_int( floatx80 a )
        !          3502: {
        !          3503:     flag aSign;
        !          3504:     int32 aExp;
        !          3505:     bits64 lastBitMask, roundBitsMask;
        !          3506:     int8 roundingMode;
        !          3507:     floatx80 z;
        !          3508:
        !          3509:     aExp = extractFloatx80Exp( a );
        !          3510:     if ( 0x403E <= aExp ) {
        !          3511:         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
        !          3512:             return propagateFloatx80NaN( a, a );
        !          3513:         }
        !          3514:         return a;
        !          3515:     }
        !          3516:     if ( aExp < 0x3FFF ) {
        !          3517:         if (    ( aExp == 0 )
        !          3518:              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
        !          3519:             return a;
        !          3520:         }
        !          3521:         float_set_inexact();
        !          3522:         aSign = extractFloatx80Sign( a );
        !          3523:         switch ( float_rounding_mode() ) {
        !          3524:          case float_round_nearest_even:
        !          3525:             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
        !          3526:                ) {
        !          3527:                 return
        !          3528:                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
        !          3529:             }
        !          3530:             break;
        !          3531:          case float_round_down:
        !          3532:             return
        !          3533:                   aSign ?
        !          3534:                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
        !          3535:                 : packFloatx80( 0, 0, 0 );
        !          3536:          case float_round_up:
        !          3537:             return
        !          3538:                   aSign ? packFloatx80( 1, 0, 0 )
        !          3539:                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
        !          3540:         }
        !          3541:         return packFloatx80( aSign, 0, 0 );
        !          3542:     }
        !          3543:     lastBitMask = 1;
        !          3544:     lastBitMask <<= 0x403E - aExp;
        !          3545:     roundBitsMask = lastBitMask - 1;
        !          3546:     z = a;
        !          3547:     roundingMode = float_rounding_mode();
        !          3548:     if ( roundingMode == float_round_nearest_even ) {
        !          3549:         z.low += lastBitMask>>1;
        !          3550:         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
        !          3551:     }
        !          3552:     else if ( roundingMode != float_round_to_zero ) {
        !          3553:         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
        !          3554:             z.low += roundBitsMask;
        !          3555:         }
        !          3556:     }
        !          3557:     z.low &= ~ roundBitsMask;
        !          3558:     if ( z.low == 0 ) {
        !          3559:         ++z.high;
        !          3560:         z.low = LIT64( 0x8000000000000000 );
        !          3561:     }
        !          3562:     if ( z.low != a.low ) float_set_inexact();
        !          3563:     return z;
        !          3564:
        !          3565: }
        !          3566:
        !          3567: /*
        !          3568: -------------------------------------------------------------------------------
        !          3569: Returns the result of adding the absolute values of the extended double-
        !          3570: precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
        !          3571: negated before being returned.  `zSign' is ignored if the result is a NaN.
        !          3572: The addition is performed according to the IEC/IEEE Standard for Binary
        !          3573: Floating-Point Arithmetic.
        !          3574: -------------------------------------------------------------------------------
        !          3575: */
        !          3576: static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
        !          3577: {
        !          3578:     int32 aExp, bExp, zExp;
        !          3579:     bits64 aSig, bSig, zSig0, zSig1;
        !          3580:     int32 expDiff;
        !          3581:
        !          3582:     aSig = extractFloatx80Frac( a );
        !          3583:     aExp = extractFloatx80Exp( a );
        !          3584:     bSig = extractFloatx80Frac( b );
        !          3585:     bExp = extractFloatx80Exp( b );
        !          3586:     expDiff = aExp - bExp;
        !          3587:     if ( 0 < expDiff ) {
        !          3588:         if ( aExp == 0x7FFF ) {
        !          3589:             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3590:             return a;
        !          3591:         }
        !          3592:         if ( bExp == 0 ) --expDiff;
        !          3593:         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
        !          3594:         zExp = aExp;
        !          3595:     }
        !          3596:     else if ( expDiff < 0 ) {
        !          3597:         if ( bExp == 0x7FFF ) {
        !          3598:             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3599:             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3600:         }
        !          3601:         if ( aExp == 0 ) ++expDiff;
        !          3602:         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
        !          3603:         zExp = bExp;
        !          3604:     }
        !          3605:     else {
        !          3606:         if ( aExp == 0x7FFF ) {
        !          3607:             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
        !          3608:                 return propagateFloatx80NaN( a, b );
        !          3609:             }
        !          3610:             return a;
        !          3611:         }
        !          3612:         zSig1 = 0;
        !          3613:         zSig0 = aSig + bSig;
        !          3614:         if ( aExp == 0 ) {
        !          3615:             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
        !          3616:             goto roundAndPack;
        !          3617:         }
        !          3618:         zExp = aExp;
        !          3619:         goto shiftRight1;
        !          3620:     }
        !          3621:     zSig0 = aSig + bSig;
        !          3622:     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
        !          3623:  shiftRight1:
        !          3624:     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
        !          3625:     zSig0 |= LIT64( 0x8000000000000000 );
        !          3626:     ++zExp;
        !          3627:  roundAndPack:
        !          3628:     return
        !          3629:         roundAndPackFloatx80(
        !          3630:             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
        !          3631:
        !          3632: }
        !          3633:
        !          3634: /*
        !          3635: -------------------------------------------------------------------------------
        !          3636: Returns the result of subtracting the absolute values of the extended
        !          3637: double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
        !          3638: difference is negated before being returned.  `zSign' is ignored if the
        !          3639: result is a NaN.  The subtraction is performed according to the IEC/IEEE
        !          3640: Standard for Binary Floating-Point Arithmetic.
        !          3641: -------------------------------------------------------------------------------
        !          3642: */
        !          3643: static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
        !          3644: {
        !          3645:     int32 aExp, bExp, zExp;
        !          3646:     bits64 aSig, bSig, zSig0, zSig1;
        !          3647:     int32 expDiff;
        !          3648:     floatx80 z;
        !          3649:
        !          3650:     aSig = extractFloatx80Frac( a );
        !          3651:     aExp = extractFloatx80Exp( a );
        !          3652:     bSig = extractFloatx80Frac( b );
        !          3653:     bExp = extractFloatx80Exp( b );
        !          3654:     expDiff = aExp - bExp;
        !          3655:     if ( 0 < expDiff ) goto aExpBigger;
        !          3656:     if ( expDiff < 0 ) goto bExpBigger;
        !          3657:     if ( aExp == 0x7FFF ) {
        !          3658:         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
        !          3659:             return propagateFloatx80NaN( a, b );
        !          3660:         }
        !          3661:         float_raise( float_flag_invalid );
        !          3662:         z.low = floatx80_default_nan_low;
        !          3663:         z.high = floatx80_default_nan_high;
        !          3664:         return z;
        !          3665:     }
        !          3666:     if ( aExp == 0 ) {
        !          3667:         aExp = 1;
        !          3668:         bExp = 1;
        !          3669:     }
        !          3670:     zSig1 = 0;
        !          3671:     if ( bSig < aSig ) goto aBigger;
        !          3672:     if ( aSig < bSig ) goto bBigger;
        !          3673:     return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
        !          3674:  bExpBigger:
        !          3675:     if ( bExp == 0x7FFF ) {
        !          3676:         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3677:         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3678:     }
        !          3679:     if ( aExp == 0 ) ++expDiff;
        !          3680:     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
        !          3681:  bBigger:
        !          3682:     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
        !          3683:     zExp = bExp;
        !          3684:     zSign ^= 1;
        !          3685:     goto normalizeRoundAndPack;
        !          3686:  aExpBigger:
        !          3687:     if ( aExp == 0x7FFF ) {
        !          3688:         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3689:         return a;
        !          3690:     }
        !          3691:     if ( bExp == 0 ) --expDiff;
        !          3692:     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
        !          3693:  aBigger:
        !          3694:     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
        !          3695:     zExp = aExp;
        !          3696:  normalizeRoundAndPack:
        !          3697:     return
        !          3698:         normalizeRoundAndPackFloatx80(
        !          3699:             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
        !          3700:
        !          3701: }
        !          3702:
        !          3703: /*
        !          3704: -------------------------------------------------------------------------------
        !          3705: Returns the result of adding the extended double-precision floating-point
        !          3706: values `a' and `b'.  The operation is performed according to the IEC/IEEE
        !          3707: Standard for Binary Floating-Point Arithmetic.
        !          3708: -------------------------------------------------------------------------------
        !          3709: */
        !          3710: floatx80 floatx80_add( floatx80 a, floatx80 b )
        !          3711: {
        !          3712:     flag aSign, bSign;
        !          3713:
        !          3714:     aSign = extractFloatx80Sign( a );
        !          3715:     bSign = extractFloatx80Sign( b );
        !          3716:     if ( aSign == bSign ) {
        !          3717:         return addFloatx80Sigs( a, b, aSign );
        !          3718:     }
        !          3719:     else {
        !          3720:         return subFloatx80Sigs( a, b, aSign );
        !          3721:     }
        !          3722:
        !          3723: }
        !          3724:
        !          3725: /*
        !          3726: -------------------------------------------------------------------------------
        !          3727: Returns the result of subtracting the extended double-precision floating-
        !          3728: point values `a' and `b'.  The operation is performed according to the
        !          3729: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3730: -------------------------------------------------------------------------------
        !          3731: */
        !          3732: floatx80 floatx80_sub( floatx80 a, floatx80 b )
        !          3733: {
        !          3734:     flag aSign, bSign;
        !          3735:
        !          3736:     aSign = extractFloatx80Sign( a );
        !          3737:     bSign = extractFloatx80Sign( b );
        !          3738:     if ( aSign == bSign ) {
        !          3739:         return subFloatx80Sigs( a, b, aSign );
        !          3740:     }
        !          3741:     else {
        !          3742:         return addFloatx80Sigs( a, b, aSign );
        !          3743:     }
        !          3744:
        !          3745: }
        !          3746:
        !          3747: /*
        !          3748: -------------------------------------------------------------------------------
        !          3749: Returns the result of multiplying the extended double-precision floating-
        !          3750: point values `a' and `b'.  The operation is performed according to the
        !          3751: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3752: -------------------------------------------------------------------------------
        !          3753: */
        !          3754: floatx80 floatx80_mul( floatx80 a, floatx80 b )
        !          3755: {
        !          3756:     flag aSign, bSign, zSign;
        !          3757:     int32 aExp, bExp, zExp;
        !          3758:     bits64 aSig, bSig, zSig0, zSig1;
        !          3759:     floatx80 z;
        !          3760:
        !          3761:     aSig = extractFloatx80Frac( a );
        !          3762:     aExp = extractFloatx80Exp( a );
        !          3763:     aSign = extractFloatx80Sign( a );
        !          3764:     bSig = extractFloatx80Frac( b );
        !          3765:     bExp = extractFloatx80Exp( b );
        !          3766:     bSign = extractFloatx80Sign( b );
        !          3767:     zSign = aSign ^ bSign;
        !          3768:     if ( aExp == 0x7FFF ) {
        !          3769:         if (    (bits64) ( aSig<<1 )
        !          3770:              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
        !          3771:             return propagateFloatx80NaN( a, b );
        !          3772:         }
        !          3773:         if ( ( bExp | bSig ) == 0 ) goto invalid;
        !          3774:         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3775:     }
        !          3776:     if ( bExp == 0x7FFF ) {
        !          3777:         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3778:         if ( ( aExp | aSig ) == 0 ) {
        !          3779:  invalid:
        !          3780:             float_raise( float_flag_invalid );
        !          3781:             z.low = floatx80_default_nan_low;
        !          3782:             z.high = floatx80_default_nan_high;
        !          3783:             return z;
        !          3784:         }
        !          3785:         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3786:     }
        !          3787:     if ( aExp == 0 ) {
        !          3788:         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
        !          3789:         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
        !          3790:     }
        !          3791:     if ( bExp == 0 ) {
        !          3792:         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
        !          3793:         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
        !          3794:     }
        !          3795:     zExp = aExp + bExp - 0x3FFE;
        !          3796:     mul64To128( aSig, bSig, &zSig0, &zSig1 );
        !          3797:     if ( 0 < (sbits64) zSig0 ) {
        !          3798:         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
        !          3799:         --zExp;
        !          3800:     }
        !          3801:     return
        !          3802:         roundAndPackFloatx80(
        !          3803:             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
        !          3804:
        !          3805: }
        !          3806:
        !          3807: /*
        !          3808: -------------------------------------------------------------------------------
        !          3809: Returns the result of dividing the extended double-precision floating-point
        !          3810: value `a' by the corresponding value `b'.  The operation is performed
        !          3811: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3812: -------------------------------------------------------------------------------
        !          3813: */
        !          3814: floatx80 floatx80_div( floatx80 a, floatx80 b )
        !          3815: {
        !          3816:     flag aSign, bSign, zSign;
        !          3817:     int32 aExp, bExp, zExp;
        !          3818:     bits64 aSig, bSig, zSig0, zSig1;
        !          3819:     bits64 rem0, rem1, rem2, term0, term1, term2;
        !          3820:     floatx80 z;
        !          3821:
        !          3822:     aSig = extractFloatx80Frac( a );
        !          3823:     aExp = extractFloatx80Exp( a );
        !          3824:     aSign = extractFloatx80Sign( a );
        !          3825:     bSig = extractFloatx80Frac( b );
        !          3826:     bExp = extractFloatx80Exp( b );
        !          3827:     bSign = extractFloatx80Sign( b );
        !          3828:     zSign = aSign ^ bSign;
        !          3829:     if ( aExp == 0x7FFF ) {
        !          3830:         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3831:         if ( bExp == 0x7FFF ) {
        !          3832:             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3833:             goto invalid;
        !          3834:         }
        !          3835:         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3836:     }
        !          3837:     if ( bExp == 0x7FFF ) {
        !          3838:         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3839:         return packFloatx80( zSign, 0, 0 );
        !          3840:     }
        !          3841:     if ( bExp == 0 ) {
        !          3842:         if ( bSig == 0 ) {
        !          3843:             if ( ( aExp | aSig ) == 0 ) {
        !          3844:  invalid:
        !          3845:                 float_raise( float_flag_invalid );
        !          3846:                 z.low = floatx80_default_nan_low;
        !          3847:                 z.high = floatx80_default_nan_high;
        !          3848:                 return z;
        !          3849:             }
        !          3850:             float_raise( float_flag_divbyzero );
        !          3851:             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          3852:         }
        !          3853:         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
        !          3854:     }
        !          3855:     if ( aExp == 0 ) {
        !          3856:         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
        !          3857:         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
        !          3858:     }
        !          3859:     zExp = aExp - bExp + 0x3FFE;
        !          3860:     rem1 = 0;
        !          3861:     if ( bSig <= aSig ) {
        !          3862:         shift128Right( aSig, 0, 1, &aSig, &rem1 );
        !          3863:         ++zExp;
        !          3864:     }
        !          3865:     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
        !          3866:     mul64To128( bSig, zSig0, &term0, &term1 );
        !          3867:     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
        !          3868:     while ( (sbits64) rem0 < 0 ) {
        !          3869:         --zSig0;
        !          3870:         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
        !          3871:     }
        !          3872:     zSig1 = estimateDiv128To64( rem1, 0, bSig );
        !          3873:     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
        !          3874:         mul64To128( bSig, zSig1, &term1, &term2 );
        !          3875:         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
        !          3876:         while ( (sbits64) rem1 < 0 ) {
        !          3877:             --zSig1;
        !          3878:             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
        !          3879:         }
        !          3880:         zSig1 |= ( ( rem1 | rem2 ) != 0 );
        !          3881:     }
        !          3882:     return
        !          3883:         roundAndPackFloatx80(
        !          3884:             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
        !          3885:
        !          3886: }
        !          3887:
        !          3888: /*
        !          3889: -------------------------------------------------------------------------------
        !          3890: Returns the remainder of the extended double-precision floating-point value
        !          3891: `a' with respect to the corresponding value `b'.  The operation is performed
        !          3892: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          3893: -------------------------------------------------------------------------------
        !          3894: */
        !          3895: floatx80 floatx80_rem( floatx80 a, floatx80 b )
        !          3896: {
        !          3897:     flag aSign, bSign, zSign;
        !          3898:     int32 aExp, bExp, expDiff;
        !          3899:     bits64 aSig0, aSig1, bSig;
        !          3900:     bits64 q, term0, term1, alternateASig0, alternateASig1;
        !          3901:     floatx80 z;
        !          3902:
        !          3903:     aSig0 = extractFloatx80Frac( a );
        !          3904:     aExp = extractFloatx80Exp( a );
        !          3905:     aSign = extractFloatx80Sign( a );
        !          3906:     bSig = extractFloatx80Frac( b );
        !          3907:     bExp = extractFloatx80Exp( b );
        !          3908:     bSign = extractFloatx80Sign( b );
        !          3909:     if ( aExp == 0x7FFF ) {
        !          3910:         if (    (bits64) ( aSig0<<1 )
        !          3911:              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
        !          3912:             return propagateFloatx80NaN( a, b );
        !          3913:         }
        !          3914:         goto invalid;
        !          3915:     }
        !          3916:     if ( bExp == 0x7FFF ) {
        !          3917:         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
        !          3918:         return a;
        !          3919:     }
        !          3920:     if ( bExp == 0 ) {
        !          3921:         if ( bSig == 0 ) {
        !          3922:  invalid:
        !          3923:             float_raise( float_flag_invalid );
        !          3924:             z.low = floatx80_default_nan_low;
        !          3925:             z.high = floatx80_default_nan_high;
        !          3926:             return z;
        !          3927:         }
        !          3928:         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
        !          3929:     }
        !          3930:     if ( aExp == 0 ) {
        !          3931:         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
        !          3932:         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
        !          3933:     }
        !          3934:     bSig |= LIT64( 0x8000000000000000 );
        !          3935:     zSign = aSign;
        !          3936:     expDiff = aExp - bExp;
        !          3937:     aSig1 = 0;
        !          3938:     if ( expDiff < 0 ) {
        !          3939:         if ( expDiff < -1 ) return a;
        !          3940:         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
        !          3941:         expDiff = 0;
        !          3942:     }
        !          3943:     q = ( bSig <= aSig0 );
        !          3944:     if ( q ) aSig0 -= bSig;
        !          3945:     expDiff -= 64;
        !          3946:     while ( 0 < expDiff ) {
        !          3947:         q = estimateDiv128To64( aSig0, aSig1, bSig );
        !          3948:         q = ( 2 < q ) ? q - 2 : 0;
        !          3949:         mul64To128( bSig, q, &term0, &term1 );
        !          3950:         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
        !          3951:         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
        !          3952:         expDiff -= 62;
        !          3953:     }
        !          3954:     expDiff += 64;
        !          3955:     if ( 0 < expDiff ) {
        !          3956:         q = estimateDiv128To64( aSig0, aSig1, bSig );
        !          3957:         q = ( 2 < q ) ? q - 2 : 0;
        !          3958:         q >>= 64 - expDiff;
        !          3959:         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
        !          3960:         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
        !          3961:         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
        !          3962:         while ( le128( term0, term1, aSig0, aSig1 ) ) {
        !          3963:             ++q;
        !          3964:             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
        !          3965:         }
        !          3966:     }
        !          3967:     else {
        !          3968:         term1 = 0;
        !          3969:         term0 = bSig;
        !          3970:     }
        !          3971:     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
        !          3972:     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
        !          3973:          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
        !          3974:               && ( q & 1 ) )
        !          3975:        ) {
        !          3976:         aSig0 = alternateASig0;
        !          3977:         aSig1 = alternateASig1;
        !          3978:         zSign = ! zSign;
        !          3979:     }
        !          3980:     return
        !          3981:         normalizeRoundAndPackFloatx80(
        !          3982:             80, zSign, bExp + expDiff, aSig0, aSig1 );
        !          3983:
        !          3984: }
        !          3985:
        !          3986: /*
        !          3987: -------------------------------------------------------------------------------
        !          3988: Returns the square root of the extended double-precision floating-point
        !          3989: value `a'.  The operation is performed according to the IEC/IEEE Standard
        !          3990: for Binary Floating-Point Arithmetic.
        !          3991: -------------------------------------------------------------------------------
        !          3992: */
        !          3993: floatx80 floatx80_sqrt( floatx80 a )
        !          3994: {
        !          3995:     flag aSign;
        !          3996:     int32 aExp, zExp;
        !          3997:     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
        !          3998:     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
        !          3999:     floatx80 z;
        !          4000:
        !          4001:     aSig0 = extractFloatx80Frac( a );
        !          4002:     aExp = extractFloatx80Exp( a );
        !          4003:     aSign = extractFloatx80Sign( a );
        !          4004:     if ( aExp == 0x7FFF ) {
        !          4005:         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
        !          4006:         if ( ! aSign ) return a;
        !          4007:         goto invalid;
        !          4008:     }
        !          4009:     if ( aSign ) {
        !          4010:         if ( ( aExp | aSig0 ) == 0 ) return a;
        !          4011:  invalid:
        !          4012:         float_raise( float_flag_invalid );
        !          4013:         z.low = floatx80_default_nan_low;
        !          4014:         z.high = floatx80_default_nan_high;
        !          4015:         return z;
        !          4016:     }
        !          4017:     if ( aExp == 0 ) {
        !          4018:         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
        !          4019:         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
        !          4020:     }
        !          4021:     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
        !          4022:     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
        !          4023:     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
        !          4024:     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
        !          4025:     doubleZSig0 = zSig0<<1;
        !          4026:     mul64To128( zSig0, zSig0, &term0, &term1 );
        !          4027:     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
        !          4028:     while ( (sbits64) rem0 < 0 ) {
        !          4029:         --zSig0;
        !          4030:         doubleZSig0 -= 2;
        !          4031:         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
        !          4032:     }
        !          4033:     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
        !          4034:     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
        !          4035:         if ( zSig1 == 0 ) zSig1 = 1;
        !          4036:         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
        !          4037:         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
        !          4038:         mul64To128( zSig1, zSig1, &term2, &term3 );
        !          4039:         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
        !          4040:         while ( (sbits64) rem1 < 0 ) {
        !          4041:             --zSig1;
        !          4042:             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
        !          4043:             term3 |= 1;
        !          4044:             term2 |= doubleZSig0;
        !          4045:             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
        !          4046:         }
        !          4047:         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
        !          4048:     }
        !          4049:     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
        !          4050:     zSig0 |= doubleZSig0;
        !          4051:     return
        !          4052:         roundAndPackFloatx80(
        !          4053:             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
        !          4054:
        !          4055: }
        !          4056:
        !          4057: /*
        !          4058: -------------------------------------------------------------------------------
        !          4059: Returns 1 if the extended double-precision floating-point value `a' is
        !          4060: equal to the corresponding value `b', and 0 otherwise.  The comparison is
        !          4061: performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4062: Arithmetic.
        !          4063: -------------------------------------------------------------------------------
        !          4064: */
        !          4065: flag floatx80_eq( floatx80 a, floatx80 b )
        !          4066: {
        !          4067:
        !          4068:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4069:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4070:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4071:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4072:        ) {
        !          4073:         if (    floatx80_is_signaling_nan( a )
        !          4074:              || floatx80_is_signaling_nan( b ) ) {
        !          4075:             float_raise( float_flag_invalid );
        !          4076:         }
        !          4077:         return 0;
        !          4078:     }
        !          4079:     return
        !          4080:            ( a.low == b.low )
        !          4081:         && (    ( a.high == b.high )
        !          4082:              || (    ( a.low == 0 )
        !          4083:                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
        !          4084:            );
        !          4085:
        !          4086: }
        !          4087:
        !          4088: /*
        !          4089: -------------------------------------------------------------------------------
        !          4090: Returns 1 if the extended double-precision floating-point value `a' is
        !          4091: less than or equal to the corresponding value `b', and 0 otherwise.  The
        !          4092: comparison is performed according to the IEC/IEEE Standard for Binary
        !          4093: Floating-Point Arithmetic.
        !          4094: -------------------------------------------------------------------------------
        !          4095: */
        !          4096: flag floatx80_le( floatx80 a, floatx80 b )
        !          4097: {
        !          4098:     flag aSign, bSign;
        !          4099:
        !          4100:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4101:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4102:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4103:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4104:        ) {
        !          4105:         float_raise( float_flag_invalid );
        !          4106:         return 0;
        !          4107:     }
        !          4108:     aSign = extractFloatx80Sign( a );
        !          4109:     bSign = extractFloatx80Sign( b );
        !          4110:     if ( aSign != bSign ) {
        !          4111:         return
        !          4112:                aSign
        !          4113:             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          4114:                  == 0 );
        !          4115:     }
        !          4116:     return
        !          4117:           aSign ? le128( b.high, b.low, a.high, a.low )
        !          4118:         : le128( a.high, a.low, b.high, b.low );
        !          4119:
        !          4120: }
        !          4121:
        !          4122: /*
        !          4123: -------------------------------------------------------------------------------
        !          4124: Returns 1 if the extended double-precision floating-point value `a' is
        !          4125: less than the corresponding value `b', and 0 otherwise.  The comparison
        !          4126: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4127: Arithmetic.
        !          4128: -------------------------------------------------------------------------------
        !          4129: */
        !          4130: flag floatx80_lt( floatx80 a, floatx80 b )
        !          4131: {
        !          4132:     flag aSign, bSign;
        !          4133:
        !          4134:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4135:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4136:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4137:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4138:        ) {
        !          4139:         float_raise( float_flag_invalid );
        !          4140:         return 0;
        !          4141:     }
        !          4142:     aSign = extractFloatx80Sign( a );
        !          4143:     bSign = extractFloatx80Sign( b );
        !          4144:     if ( aSign != bSign ) {
        !          4145:         return
        !          4146:                aSign
        !          4147:             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          4148:                  != 0 );
        !          4149:     }
        !          4150:     return
        !          4151:           aSign ? lt128( b.high, b.low, a.high, a.low )
        !          4152:         : lt128( a.high, a.low, b.high, b.low );
        !          4153:
        !          4154: }
        !          4155:
        !          4156: /*
        !          4157: -------------------------------------------------------------------------------
        !          4158: Returns 1 if the extended double-precision floating-point value `a' is equal
        !          4159: to the corresponding value `b', and 0 otherwise.  The invalid exception is
        !          4160: raised if either operand is a NaN.  Otherwise, the comparison is performed
        !          4161: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          4162: -------------------------------------------------------------------------------
        !          4163: */
        !          4164: flag floatx80_eq_signaling( floatx80 a, floatx80 b )
        !          4165: {
        !          4166:
        !          4167:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4168:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4169:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4170:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4171:        ) {
        !          4172:         float_raise( float_flag_invalid );
        !          4173:         return 0;
        !          4174:     }
        !          4175:     return
        !          4176:            ( a.low == b.low )
        !          4177:         && (    ( a.high == b.high )
        !          4178:              || (    ( a.low == 0 )
        !          4179:                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
        !          4180:            );
        !          4181:
        !          4182: }
        !          4183:
        !          4184: /*
        !          4185: -------------------------------------------------------------------------------
        !          4186: Returns 1 if the extended double-precision floating-point value `a' is less
        !          4187: than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
        !          4188: do not cause an exception.  Otherwise, the comparison is performed according
        !          4189: to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          4190: -------------------------------------------------------------------------------
        !          4191: */
        !          4192: flag floatx80_le_quiet( floatx80 a, floatx80 b )
        !          4193: {
        !          4194:     flag aSign, bSign;
        !          4195:
        !          4196:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4197:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4198:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4199:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4200:        ) {
        !          4201:         if (    floatx80_is_signaling_nan( a )
        !          4202:              || floatx80_is_signaling_nan( b ) ) {
        !          4203:             float_raise( float_flag_invalid );
        !          4204:         }
        !          4205:         return 0;
        !          4206:     }
        !          4207:     aSign = extractFloatx80Sign( a );
        !          4208:     bSign = extractFloatx80Sign( b );
        !          4209:     if ( aSign != bSign ) {
        !          4210:         return
        !          4211:                aSign
        !          4212:             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          4213:                  == 0 );
        !          4214:     }
        !          4215:     return
        !          4216:           aSign ? le128( b.high, b.low, a.high, a.low )
        !          4217:         : le128( a.high, a.low, b.high, b.low );
        !          4218:
        !          4219: }
        !          4220:
        !          4221: /*
        !          4222: -------------------------------------------------------------------------------
        !          4223: Returns 1 if the extended double-precision floating-point value `a' is less
        !          4224: than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
        !          4225: an exception.  Otherwise, the comparison is performed according to the
        !          4226: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          4227: -------------------------------------------------------------------------------
        !          4228: */
        !          4229: flag floatx80_lt_quiet( floatx80 a, floatx80 b )
        !          4230: {
        !          4231:     flag aSign, bSign;
        !          4232:
        !          4233:     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
        !          4234:               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
        !          4235:          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
        !          4236:               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        !          4237:        ) {
        !          4238:         if (    floatx80_is_signaling_nan( a )
        !          4239:              || floatx80_is_signaling_nan( b ) ) {
        !          4240:             float_raise( float_flag_invalid );
        !          4241:         }
        !          4242:         return 0;
        !          4243:     }
        !          4244:     aSign = extractFloatx80Sign( a );
        !          4245:     bSign = extractFloatx80Sign( b );
        !          4246:     if ( aSign != bSign ) {
        !          4247:         return
        !          4248:                aSign
        !          4249:             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          4250:                  != 0 );
        !          4251:     }
        !          4252:     return
        !          4253:           aSign ? lt128( b.high, b.low, a.high, a.low )
        !          4254:         : lt128( a.high, a.low, b.high, b.low );
        !          4255:
        !          4256: }
        !          4257:
        !          4258: #endif
        !          4259:
        !          4260: #ifdef FLOAT128
        !          4261:
        !          4262: /*
        !          4263: -------------------------------------------------------------------------------
        !          4264: Returns the result of converting the quadruple-precision floating-point
        !          4265: value `a' to the 32-bit two's complement integer format.  The conversion
        !          4266: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4267: Arithmetic---which means in particular that the conversion is rounded
        !          4268: according to the current rounding mode.  If `a' is a NaN, the largest
        !          4269: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          4270: largest integer with the same sign as `a' is returned.
        !          4271: -------------------------------------------------------------------------------
        !          4272: */
        !          4273: int32 float128_to_int32( float128 a )
        !          4274: {
        !          4275:     flag aSign;
        !          4276:     int32 aExp, shiftCount;
        !          4277:     bits64 aSig0, aSig1;
        !          4278:
        !          4279:     aSig1 = extractFloat128Frac1( a );
        !          4280:     aSig0 = extractFloat128Frac0( a );
        !          4281:     aExp = extractFloat128Exp( a );
        !          4282:     aSign = extractFloat128Sign( a );
        !          4283:     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
        !          4284:     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
        !          4285:     aSig0 |= ( aSig1 != 0 );
        !          4286:     shiftCount = 0x4028 - aExp;
        !          4287:     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
        !          4288:     return roundAndPackInt32( aSign, aSig0 );
        !          4289:
        !          4290: }
        !          4291:
        !          4292: /*
        !          4293: -------------------------------------------------------------------------------
        !          4294: Returns the result of converting the quadruple-precision floating-point
        !          4295: value `a' to the 32-bit two's complement integer format.  The conversion
        !          4296: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4297: Arithmetic, except that the conversion is always rounded toward zero.  If
        !          4298: `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
        !          4299: conversion overflows, the largest integer with the same sign as `a' is
        !          4300: returned.
        !          4301: -------------------------------------------------------------------------------
        !          4302: */
        !          4303: int32 float128_to_int32_round_to_zero( float128 a )
        !          4304: {
        !          4305:     flag aSign;
        !          4306:     int32 aExp, shiftCount;
        !          4307:     bits64 aSig0, aSig1, savedASig;
        !          4308:     int32 z;
        !          4309:
        !          4310:     aSig1 = extractFloat128Frac1( a );
        !          4311:     aSig0 = extractFloat128Frac0( a );
        !          4312:     aExp = extractFloat128Exp( a );
        !          4313:     aSign = extractFloat128Sign( a );
        !          4314:     aSig0 |= ( aSig1 != 0 );
        !          4315:     if ( 0x401E < aExp ) {
        !          4316:         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
        !          4317:         goto invalid;
        !          4318:     }
        !          4319:     else if ( aExp < 0x3FFF ) {
        !          4320:         if ( aExp || aSig0 ) float_set_inexact();
        !          4321:         return 0;
        !          4322:     }
        !          4323:     aSig0 |= LIT64( 0x0001000000000000 );
        !          4324:     shiftCount = 0x402F - aExp;
        !          4325:     savedASig = aSig0;
        !          4326:     aSig0 >>= shiftCount;
        !          4327:     z = aSig0;
        !          4328:     if ( aSign ) z = - z;
        !          4329:     if ( ( z < 0 ) ^ aSign ) {
        !          4330:  invalid:
        !          4331:         float_raise( float_flag_invalid );
        !          4332:         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
        !          4333:     }
        !          4334:     if ( ( aSig0<<shiftCount ) != savedASig ) {
        !          4335:         float_set_inexact();
        !          4336:     }
        !          4337:     return z;
        !          4338:
        !          4339: }
        !          4340:
        !          4341: /*
        !          4342: -------------------------------------------------------------------------------
        !          4343: Returns the result of converting the quadruple-precision floating-point
        !          4344: value `a' to the 64-bit two's complement integer format.  The conversion
        !          4345: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4346: Arithmetic---which means in particular that the conversion is rounded
        !          4347: according to the current rounding mode.  If `a' is a NaN, the largest
        !          4348: positive integer is returned.  Otherwise, if the conversion overflows, the
        !          4349: largest integer with the same sign as `a' is returned.
        !          4350: -------------------------------------------------------------------------------
        !          4351: */
        !          4352: int64 float128_to_int64( float128 a )
        !          4353: {
        !          4354:     flag aSign;
        !          4355:     int32 aExp, shiftCount;
        !          4356:     bits64 aSig0, aSig1;
        !          4357:
        !          4358:     aSig1 = extractFloat128Frac1( a );
        !          4359:     aSig0 = extractFloat128Frac0( a );
        !          4360:     aExp = extractFloat128Exp( a );
        !          4361:     aSign = extractFloat128Sign( a );
        !          4362:     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
        !          4363:     shiftCount = 0x402F - aExp;
        !          4364:     if ( shiftCount <= 0 ) {
        !          4365:         if ( 0x403E < aExp ) {
        !          4366:             float_raise( float_flag_invalid );
        !          4367:             if (    ! aSign
        !          4368:                  || (    ( aExp == 0x7FFF )
        !          4369:                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
        !          4370:                     )
        !          4371:                ) {
        !          4372:                 return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          4373:             }
        !          4374:             return (sbits64) LIT64( 0x8000000000000000 );
        !          4375:         }
        !          4376:         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
        !          4377:     }
        !          4378:     else {
        !          4379:         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
        !          4380:     }
        !          4381:     return roundAndPackInt64( aSign, aSig0, aSig1 );
        !          4382:
        !          4383: }
        !          4384:
        !          4385: /*
        !          4386: -------------------------------------------------------------------------------
        !          4387: Returns the result of converting the quadruple-precision floating-point
        !          4388: value `a' to the 64-bit two's complement integer format.  The conversion
        !          4389: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4390: Arithmetic, except that the conversion is always rounded toward zero.
        !          4391: If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
        !          4392: the conversion overflows, the largest integer with the same sign as `a' is
        !          4393: returned.
        !          4394: -------------------------------------------------------------------------------
        !          4395: */
        !          4396: int64 float128_to_int64_round_to_zero( float128 a )
        !          4397: {
        !          4398:     flag aSign;
        !          4399:     int32 aExp, shiftCount;
        !          4400:     bits64 aSig0, aSig1;
        !          4401:     int64 z;
        !          4402:
        !          4403:     aSig1 = extractFloat128Frac1( a );
        !          4404:     aSig0 = extractFloat128Frac0( a );
        !          4405:     aExp = extractFloat128Exp( a );
        !          4406:     aSign = extractFloat128Sign( a );
        !          4407:     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
        !          4408:     shiftCount = aExp - 0x402F;
        !          4409:     if ( 0 < shiftCount ) {
        !          4410:         if ( 0x403E <= aExp ) {
        !          4411:             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
        !          4412:             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
        !          4413:                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
        !          4414:                 if ( aSig1 ) float_set_inexact();
        !          4415:             }
        !          4416:             else {
        !          4417:                 float_raise( float_flag_invalid );
        !          4418:                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
        !          4419:                     return LIT64( 0x7FFFFFFFFFFFFFFF );
        !          4420:                 }
        !          4421:             }
        !          4422:             return (sbits64) LIT64( 0x8000000000000000 );
        !          4423:         }
        !          4424:         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
        !          4425:         if ( (bits64) ( aSig1<<shiftCount ) ) {
        !          4426:             float_set_inexact();
        !          4427:         }
        !          4428:     }
        !          4429:     else {
        !          4430:         if ( aExp < 0x3FFF ) {
        !          4431:             if ( aExp | aSig0 | aSig1 ) {
        !          4432:                 float_set_inexact();
        !          4433:             }
        !          4434:             return 0;
        !          4435:         }
        !          4436:         z = aSig0>>( - shiftCount );
        !          4437:         if (    aSig1
        !          4438:              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
        !          4439:             float_set_inexact();
        !          4440:         }
        !          4441:     }
        !          4442:     if ( aSign ) z = - z;
        !          4443:     return z;
        !          4444:
        !          4445: }
        !          4446:
        !          4447: /*
        !          4448: -------------------------------------------------------------------------------
        !          4449: Returns the result of converting the quadruple-precision floating-point
        !          4450: value `a' to the single-precision floating-point format.  The conversion
        !          4451: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4452: Arithmetic.
        !          4453: -------------------------------------------------------------------------------
        !          4454: */
        !          4455: float32 float128_to_float32( float128 a )
        !          4456: {
        !          4457:     flag aSign;
        !          4458:     int32 aExp;
        !          4459:     bits64 aSig0, aSig1;
        !          4460:     bits32 zSig;
        !          4461:
        !          4462:     aSig1 = extractFloat128Frac1( a );
        !          4463:     aSig0 = extractFloat128Frac0( a );
        !          4464:     aExp = extractFloat128Exp( a );
        !          4465:     aSign = extractFloat128Sign( a );
        !          4466:     if ( aExp == 0x7FFF ) {
        !          4467:         if ( aSig0 | aSig1 ) {
        !          4468:             return commonNaNToFloat32( float128ToCommonNaN( a ) );
        !          4469:         }
        !          4470:         return packFloat32( aSign, 0xFF, 0 );
        !          4471:     }
        !          4472:     aSig0 |= ( aSig1 != 0 );
        !          4473:     shift64RightJamming( aSig0, 18, &aSig0 );
        !          4474:     zSig = aSig0;
        !          4475:     if ( aExp || zSig ) {
        !          4476:         zSig |= 0x40000000;
        !          4477:         aExp -= 0x3F81;
        !          4478:     }
        !          4479:     return roundAndPackFloat32( aSign, aExp, zSig );
        !          4480:
        !          4481: }
        !          4482:
        !          4483: /*
        !          4484: -------------------------------------------------------------------------------
        !          4485: Returns the result of converting the quadruple-precision floating-point
        !          4486: value `a' to the double-precision floating-point format.  The conversion
        !          4487: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          4488: Arithmetic.
        !          4489: -------------------------------------------------------------------------------
        !          4490: */
        !          4491: float64 float128_to_float64( float128 a )
        !          4492: {
        !          4493:     flag aSign;
        !          4494:     int32 aExp;
        !          4495:     bits64 aSig0, aSig1;
        !          4496:
        !          4497:     aSig1 = extractFloat128Frac1( a );
        !          4498:     aSig0 = extractFloat128Frac0( a );
        !          4499:     aExp = extractFloat128Exp( a );
        !          4500:     aSign = extractFloat128Sign( a );
        !          4501:     if ( aExp == 0x7FFF ) {
        !          4502:         if ( aSig0 | aSig1 ) {
        !          4503:             return commonNaNToFloat64( float128ToCommonNaN( a ) );
        !          4504:         }
        !          4505:         return packFloat64( aSign, 0x7FF, 0 );
        !          4506:     }
        !          4507:     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
        !          4508:     aSig0 |= ( aSig1 != 0 );
        !          4509:     if ( aExp || aSig0 ) {
        !          4510:         aSig0 |= LIT64( 0x4000000000000000 );
        !          4511:         aExp -= 0x3C01;
        !          4512:     }
        !          4513:     return roundAndPackFloat64( aSign, aExp, aSig0 );
        !          4514:
        !          4515: }
        !          4516:
        !          4517: #ifdef FLOATX80
        !          4518:
        !          4519: /*
        !          4520: -------------------------------------------------------------------------------
        !          4521: Returns the result of converting the quadruple-precision floating-point
        !          4522: value `a' to the extended double-precision floating-point format.  The
        !          4523: conversion is performed according to the IEC/IEEE Standard for Binary
        !          4524: Floating-Point Arithmetic.
        !          4525: -------------------------------------------------------------------------------
        !          4526: */
        !          4527: floatx80 float128_to_floatx80( float128 a )
        !          4528: {
        !          4529:     flag aSign;
        !          4530:     int32 aExp;
        !          4531:     bits64 aSig0, aSig1;
        !          4532:
        !          4533:     aSig1 = extractFloat128Frac1( a );
        !          4534:     aSig0 = extractFloat128Frac0( a );
        !          4535:     aExp = extractFloat128Exp( a );
        !          4536:     aSign = extractFloat128Sign( a );
        !          4537:     if ( aExp == 0x7FFF ) {
        !          4538:         if ( aSig0 | aSig1 ) {
        !          4539:             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
        !          4540:         }
        !          4541:         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        !          4542:     }
        !          4543:     if ( aExp == 0 ) {
        !          4544:         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
        !          4545:         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
        !          4546:     }
        !          4547:     else {
        !          4548:         aSig0 |= LIT64( 0x0001000000000000 );
        !          4549:     }
        !          4550:     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
        !          4551:     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
        !          4552:
        !          4553: }
        !          4554:
        !          4555: #endif
        !          4556:
        !          4557: /*
        !          4558: -------------------------------------------------------------------------------
        !          4559: Rounds the quadruple-precision floating-point value `a' to an integer, and
        !          4560: returns the result as a quadruple-precision floating-point value.  The
        !          4561: operation is performed according to the IEC/IEEE Standard for Binary
        !          4562: Floating-Point Arithmetic.
        !          4563: -------------------------------------------------------------------------------
        !          4564: */
        !          4565: float128 float128_round_to_int( float128 a )
        !          4566: {
        !          4567:     flag aSign;
        !          4568:     int32 aExp;
        !          4569:     bits64 lastBitMask, roundBitsMask;
        !          4570:     int8 roundingMode;
        !          4571:     float128 z;
        !          4572:
        !          4573:     aExp = extractFloat128Exp( a );
        !          4574:     if ( 0x402F <= aExp ) {
        !          4575:         if ( 0x406F <= aExp ) {
        !          4576:             if (    ( aExp == 0x7FFF )
        !          4577:                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
        !          4578:                ) {
        !          4579:                 return propagateFloat128NaN( a, a );
        !          4580:             }
        !          4581:             return a;
        !          4582:         }
        !          4583:         lastBitMask = 1;
        !          4584:         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
        !          4585:         roundBitsMask = lastBitMask - 1;
        !          4586:         z = a;
        !          4587:         roundingMode = float_rounding_mode();
        !          4588:         if ( roundingMode == float_round_nearest_even ) {
        !          4589:             if ( lastBitMask ) {
        !          4590:                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
        !          4591:                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
        !          4592:             }
        !          4593:             else {
        !          4594:                 if ( (sbits64) z.low < 0 ) {
        !          4595:                     ++z.high;
        !          4596:                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
        !          4597:                 }
        !          4598:             }
        !          4599:         }
        !          4600:         else if ( roundingMode != float_round_to_zero ) {
        !          4601:             if (   extractFloat128Sign( z )
        !          4602:                  ^ ( roundingMode == float_round_up ) ) {
        !          4603:                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
        !          4604:             }
        !          4605:         }
        !          4606:         z.low &= ~ roundBitsMask;
        !          4607:     }
        !          4608:     else {
        !          4609:         if ( aExp < 0x3FFF ) {
        !          4610:             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
        !          4611:             float_set_inexact();
        !          4612:             aSign = extractFloat128Sign( a );
        !          4613:             switch ( float_rounding_mode() ) {
        !          4614:              case float_round_nearest_even:
        !          4615:                 if (    ( aExp == 0x3FFE )
        !          4616:                      && (   extractFloat128Frac0( a )
        !          4617:                           | extractFloat128Frac1( a ) )
        !          4618:                    ) {
        !          4619:                     return packFloat128( aSign, 0x3FFF, 0, 0 );
        !          4620:                 }
        !          4621:                 break;
        !          4622:              case float_round_down:
        !          4623:                 return
        !          4624:                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
        !          4625:                     : packFloat128( 0, 0, 0, 0 );
        !          4626:              case float_round_up:
        !          4627:                 return
        !          4628:                       aSign ? packFloat128( 1, 0, 0, 0 )
        !          4629:                     : packFloat128( 0, 0x3FFF, 0, 0 );
        !          4630:             }
        !          4631:             return packFloat128( aSign, 0, 0, 0 );
        !          4632:         }
        !          4633:         lastBitMask = 1;
        !          4634:         lastBitMask <<= 0x402F - aExp;
        !          4635:         roundBitsMask = lastBitMask - 1;
        !          4636:         z.low = 0;
        !          4637:         z.high = a.high;
        !          4638:         roundingMode = float_rounding_mode();
        !          4639:         if ( roundingMode == float_round_nearest_even ) {
        !          4640:             z.high += lastBitMask>>1;
        !          4641:             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
        !          4642:                 z.high &= ~ lastBitMask;
        !          4643:             }
        !          4644:         }
        !          4645:         else if ( roundingMode != float_round_to_zero ) {
        !          4646:             if (   extractFloat128Sign( z )
        !          4647:                  ^ ( roundingMode == float_round_up ) ) {
        !          4648:                 z.high |= ( a.low != 0 );
        !          4649:                 z.high += roundBitsMask;
        !          4650:             }
        !          4651:         }
        !          4652:         z.high &= ~ roundBitsMask;
        !          4653:     }
        !          4654:     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
        !          4655:         float_set_inexact();
        !          4656:     }
        !          4657:     return z;
        !          4658:
        !          4659: }
        !          4660:
        !          4661: /*
        !          4662: -------------------------------------------------------------------------------
        !          4663: Returns the result of adding the absolute values of the quadruple-precision
        !          4664: floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
        !          4665: before being returned.  `zSign' is ignored if the result is a NaN.
        !          4666: The addition is performed according to the IEC/IEEE Standard for Binary
        !          4667: Floating-Point Arithmetic.
        !          4668: -------------------------------------------------------------------------------
        !          4669: */
        !          4670: static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
        !          4671: {
        !          4672:     int32 aExp, bExp, zExp;
        !          4673:     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
        !          4674:     int32 expDiff;
        !          4675:
        !          4676:     aSig1 = extractFloat128Frac1( a );
        !          4677:     aSig0 = extractFloat128Frac0( a );
        !          4678:     aExp = extractFloat128Exp( a );
        !          4679:     bSig1 = extractFloat128Frac1( b );
        !          4680:     bSig0 = extractFloat128Frac0( b );
        !          4681:     bExp = extractFloat128Exp( b );
        !          4682:     expDiff = aExp - bExp;
        !          4683:     if ( 0 < expDiff ) {
        !          4684:         if ( aExp == 0x7FFF ) {
        !          4685:             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
        !          4686:             return a;
        !          4687:         }
        !          4688:         if ( bExp == 0 ) {
        !          4689:             --expDiff;
        !          4690:         }
        !          4691:         else {
        !          4692:             bSig0 |= LIT64( 0x0001000000000000 );
        !          4693:         }
        !          4694:         shift128ExtraRightJamming(
        !          4695:             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
        !          4696:         zExp = aExp;
        !          4697:     }
        !          4698:     else if ( expDiff < 0 ) {
        !          4699:         if ( bExp == 0x7FFF ) {
        !          4700:             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          4701:             return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          4702:         }
        !          4703:         if ( aExp == 0 ) {
        !          4704:             ++expDiff;
        !          4705:         }
        !          4706:         else {
        !          4707:             aSig0 |= LIT64( 0x0001000000000000 );
        !          4708:         }
        !          4709:         shift128ExtraRightJamming(
        !          4710:             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
        !          4711:         zExp = bExp;
        !          4712:     }
        !          4713:     else {
        !          4714:         if ( aExp == 0x7FFF ) {
        !          4715:             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
        !          4716:                 return propagateFloat128NaN( a, b );
        !          4717:             }
        !          4718:             return a;
        !          4719:         }
        !          4720:         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
        !          4721:         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
        !          4722:         zSig2 = 0;
        !          4723:         zSig0 |= LIT64( 0x0002000000000000 );
        !          4724:         zExp = aExp;
        !          4725:         goto shiftRight1;
        !          4726:     }
        !          4727:     aSig0 |= LIT64( 0x0001000000000000 );
        !          4728:     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
        !          4729:     --zExp;
        !          4730:     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
        !          4731:     ++zExp;
        !          4732:  shiftRight1:
        !          4733:     shift128ExtraRightJamming(
        !          4734:         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
        !          4735:  roundAndPack:
        !          4736:     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
        !          4737:
        !          4738: }
        !          4739:
        !          4740: /*
        !          4741: -------------------------------------------------------------------------------
        !          4742: Returns the result of subtracting the absolute values of the quadruple-
        !          4743: precision floating-point values `a' and `b'.  If `zSign' is 1, the
        !          4744: difference is negated before being returned.  `zSign' is ignored if the
        !          4745: result is a NaN.  The subtraction is performed according to the IEC/IEEE
        !          4746: Standard for Binary Floating-Point Arithmetic.
        !          4747: -------------------------------------------------------------------------------
        !          4748: */
        !          4749: static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
        !          4750: {
        !          4751:     int32 aExp, bExp, zExp;
        !          4752:     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
        !          4753:     int32 expDiff;
        !          4754:     float128 z;
        !          4755:
        !          4756:     aSig1 = extractFloat128Frac1( a );
        !          4757:     aSig0 = extractFloat128Frac0( a );
        !          4758:     aExp = extractFloat128Exp( a );
        !          4759:     bSig1 = extractFloat128Frac1( b );
        !          4760:     bSig0 = extractFloat128Frac0( b );
        !          4761:     bExp = extractFloat128Exp( b );
        !          4762:     expDiff = aExp - bExp;
        !          4763:     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
        !          4764:     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
        !          4765:     if ( 0 < expDiff ) goto aExpBigger;
        !          4766:     if ( expDiff < 0 ) goto bExpBigger;
        !          4767:     if ( aExp == 0x7FFF ) {
        !          4768:         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
        !          4769:             return propagateFloat128NaN( a, b );
        !          4770:         }
        !          4771:         float_raise( float_flag_invalid );
        !          4772:         z.low = float128_default_nan_low;
        !          4773:         z.high = float128_default_nan_high;
        !          4774:         return z;
        !          4775:     }
        !          4776:     if ( aExp == 0 ) {
        !          4777:         aExp = 1;
        !          4778:         bExp = 1;
        !          4779:     }
        !          4780:     if ( bSig0 < aSig0 ) goto aBigger;
        !          4781:     if ( aSig0 < bSig0 ) goto bBigger;
        !          4782:     if ( bSig1 < aSig1 ) goto aBigger;
        !          4783:     if ( aSig1 < bSig1 ) goto bBigger;
        !          4784:     return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
        !          4785:  bExpBigger:
        !          4786:     if ( bExp == 0x7FFF ) {
        !          4787:         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          4788:         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
        !          4789:     }
        !          4790:     if ( aExp == 0 ) {
        !          4791:         ++expDiff;
        !          4792:     }
        !          4793:     else {
        !          4794:         aSig0 |= LIT64( 0x4000000000000000 );
        !          4795:     }
        !          4796:     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
        !          4797:     bSig0 |= LIT64( 0x4000000000000000 );
        !          4798:  bBigger:
        !          4799:     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
        !          4800:     zExp = bExp;
        !          4801:     zSign ^= 1;
        !          4802:     goto normalizeRoundAndPack;
        !          4803:  aExpBigger:
        !          4804:     if ( aExp == 0x7FFF ) {
        !          4805:         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
        !          4806:         return a;
        !          4807:     }
        !          4808:     if ( bExp == 0 ) {
        !          4809:         --expDiff;
        !          4810:     }
        !          4811:     else {
        !          4812:         bSig0 |= LIT64( 0x4000000000000000 );
        !          4813:     }
        !          4814:     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
        !          4815:     aSig0 |= LIT64( 0x4000000000000000 );
        !          4816:  aBigger:
        !          4817:     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
        !          4818:     zExp = aExp;
        !          4819:  normalizeRoundAndPack:
        !          4820:     --zExp;
        !          4821:     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
        !          4822:
        !          4823: }
        !          4824:
        !          4825: /*
        !          4826: -------------------------------------------------------------------------------
        !          4827: Returns the result of adding the quadruple-precision floating-point values
        !          4828: `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
        !          4829: for Binary Floating-Point Arithmetic.
        !          4830: -------------------------------------------------------------------------------
        !          4831: */
        !          4832: float128 float128_add( float128 a, float128 b )
        !          4833: {
        !          4834:     flag aSign, bSign;
        !          4835:
        !          4836:     aSign = extractFloat128Sign( a );
        !          4837:     bSign = extractFloat128Sign( b );
        !          4838:     if ( aSign == bSign ) {
        !          4839:         return addFloat128Sigs( a, b, aSign );
        !          4840:     }
        !          4841:     else {
        !          4842:         return subFloat128Sigs( a, b, aSign );
        !          4843:     }
        !          4844:
        !          4845: }
        !          4846:
        !          4847: /*
        !          4848: -------------------------------------------------------------------------------
        !          4849: Returns the result of subtracting the quadruple-precision floating-point
        !          4850: values `a' and `b'.  The operation is performed according to the IEC/IEEE
        !          4851: Standard for Binary Floating-Point Arithmetic.
        !          4852: -------------------------------------------------------------------------------
        !          4853: */
        !          4854: float128 float128_sub( float128 a, float128 b )
        !          4855: {
        !          4856:     flag aSign, bSign;
        !          4857:
        !          4858:     aSign = extractFloat128Sign( a );
        !          4859:     bSign = extractFloat128Sign( b );
        !          4860:     if ( aSign == bSign ) {
        !          4861:         return subFloat128Sigs( a, b, aSign );
        !          4862:     }
        !          4863:     else {
        !          4864:         return addFloat128Sigs( a, b, aSign );
        !          4865:     }
        !          4866:
        !          4867: }
        !          4868:
        !          4869: /*
        !          4870: -------------------------------------------------------------------------------
        !          4871: Returns the result of multiplying the quadruple-precision floating-point
        !          4872: values `a' and `b'.  The operation is performed according to the IEC/IEEE
        !          4873: Standard for Binary Floating-Point Arithmetic.
        !          4874: -------------------------------------------------------------------------------
        !          4875: */
        !          4876: float128 float128_mul( float128 a, float128 b )
        !          4877: {
        !          4878:     flag aSign, bSign, zSign;
        !          4879:     int32 aExp, bExp, zExp;
        !          4880:     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
        !          4881:     float128 z;
        !          4882:
        !          4883:     aSig1 = extractFloat128Frac1( a );
        !          4884:     aSig0 = extractFloat128Frac0( a );
        !          4885:     aExp = extractFloat128Exp( a );
        !          4886:     aSign = extractFloat128Sign( a );
        !          4887:     bSig1 = extractFloat128Frac1( b );
        !          4888:     bSig0 = extractFloat128Frac0( b );
        !          4889:     bExp = extractFloat128Exp( b );
        !          4890:     bSign = extractFloat128Sign( b );
        !          4891:     zSign = aSign ^ bSign;
        !          4892:     if ( aExp == 0x7FFF ) {
        !          4893:         if (    ( aSig0 | aSig1 )
        !          4894:              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
        !          4895:             return propagateFloat128NaN( a, b );
        !          4896:         }
        !          4897:         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
        !          4898:         return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          4899:     }
        !          4900:     if ( bExp == 0x7FFF ) {
        !          4901:         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          4902:         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
        !          4903:  invalid:
        !          4904:             float_raise( float_flag_invalid );
        !          4905:             z.low = float128_default_nan_low;
        !          4906:             z.high = float128_default_nan_high;
        !          4907:             return z;
        !          4908:         }
        !          4909:         return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          4910:     }
        !          4911:     if ( aExp == 0 ) {
        !          4912:         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
        !          4913:         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
        !          4914:     }
        !          4915:     if ( bExp == 0 ) {
        !          4916:         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
        !          4917:         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
        !          4918:     }
        !          4919:     zExp = aExp + bExp - 0x4000;
        !          4920:     aSig0 |= LIT64( 0x0001000000000000 );
        !          4921:     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
        !          4922:     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
        !          4923:     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
        !          4924:     zSig2 |= ( zSig3 != 0 );
        !          4925:     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
        !          4926:         shift128ExtraRightJamming(
        !          4927:             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
        !          4928:         ++zExp;
        !          4929:     }
        !          4930:     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
        !          4931:
        !          4932: }
        !          4933:
        !          4934: /*
        !          4935: -------------------------------------------------------------------------------
        !          4936: Returns the result of dividing the quadruple-precision floating-point value
        !          4937: `a' by the corresponding value `b'.  The operation is performed according to
        !          4938: the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          4939: -------------------------------------------------------------------------------
        !          4940: */
        !          4941: float128 float128_div( float128 a, float128 b )
        !          4942: {
        !          4943:     flag aSign, bSign, zSign;
        !          4944:     int32 aExp, bExp, zExp;
        !          4945:     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
        !          4946:     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
        !          4947:     float128 z;
        !          4948:
        !          4949:     aSig1 = extractFloat128Frac1( a );
        !          4950:     aSig0 = extractFloat128Frac0( a );
        !          4951:     aExp = extractFloat128Exp( a );
        !          4952:     aSign = extractFloat128Sign( a );
        !          4953:     bSig1 = extractFloat128Frac1( b );
        !          4954:     bSig0 = extractFloat128Frac0( b );
        !          4955:     bExp = extractFloat128Exp( b );
        !          4956:     bSign = extractFloat128Sign( b );
        !          4957:     zSign = aSign ^ bSign;
        !          4958:     if ( aExp == 0x7FFF ) {
        !          4959:         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
        !          4960:         if ( bExp == 0x7FFF ) {
        !          4961:             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          4962:             goto invalid;
        !          4963:         }
        !          4964:         return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          4965:     }
        !          4966:     if ( bExp == 0x7FFF ) {
        !          4967:         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          4968:         return packFloat128( zSign, 0, 0, 0 );
        !          4969:     }
        !          4970:     if ( bExp == 0 ) {
        !          4971:         if ( ( bSig0 | bSig1 ) == 0 ) {
        !          4972:             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
        !          4973:  invalid:
        !          4974:                 float_raise( float_flag_invalid );
        !          4975:                 z.low = float128_default_nan_low;
        !          4976:                 z.high = float128_default_nan_high;
        !          4977:                 return z;
        !          4978:             }
        !          4979:             float_raise( float_flag_divbyzero );
        !          4980:             return packFloat128( zSign, 0x7FFF, 0, 0 );
        !          4981:         }
        !          4982:         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
        !          4983:     }
        !          4984:     if ( aExp == 0 ) {
        !          4985:         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
        !          4986:         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
        !          4987:     }
        !          4988:     zExp = aExp - bExp + 0x3FFD;
        !          4989:     shortShift128Left(
        !          4990:         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
        !          4991:     shortShift128Left(
        !          4992:         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
        !          4993:     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
        !          4994:         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
        !          4995:         ++zExp;
        !          4996:     }
        !          4997:     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
        !          4998:     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
        !          4999:     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
        !          5000:     while ( (sbits64) rem0 < 0 ) {
        !          5001:         --zSig0;
        !          5002:         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
        !          5003:     }
        !          5004:     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
        !          5005:     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
        !          5006:         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
        !          5007:         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
        !          5008:         while ( (sbits64) rem1 < 0 ) {
        !          5009:             --zSig1;
        !          5010:             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
        !          5011:         }
        !          5012:         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
        !          5013:     }
        !          5014:     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
        !          5015:     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
        !          5016:
        !          5017: }
        !          5018:
        !          5019: /*
        !          5020: -------------------------------------------------------------------------------
        !          5021: Returns the remainder of the quadruple-precision floating-point value `a'
        !          5022: with respect to the corresponding value `b'.  The operation is performed
        !          5023: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          5024: -------------------------------------------------------------------------------
        !          5025: */
        !          5026: float128 float128_rem( float128 a, float128 b )
        !          5027: {
        !          5028:     flag aSign, bSign, zSign;
        !          5029:     int32 aExp, bExp, expDiff;
        !          5030:     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
        !          5031:     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
        !          5032:     sbits64 sigMean0;
        !          5033:     float128 z;
        !          5034:
        !          5035:     aSig1 = extractFloat128Frac1( a );
        !          5036:     aSig0 = extractFloat128Frac0( a );
        !          5037:     aExp = extractFloat128Exp( a );
        !          5038:     aSign = extractFloat128Sign( a );
        !          5039:     bSig1 = extractFloat128Frac1( b );
        !          5040:     bSig0 = extractFloat128Frac0( b );
        !          5041:     bExp = extractFloat128Exp( b );
        !          5042:     bSign = extractFloat128Sign( b );
        !          5043:     if ( aExp == 0x7FFF ) {
        !          5044:         if (    ( aSig0 | aSig1 )
        !          5045:              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
        !          5046:             return propagateFloat128NaN( a, b );
        !          5047:         }
        !          5048:         goto invalid;
        !          5049:     }
        !          5050:     if ( bExp == 0x7FFF ) {
        !          5051:         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
        !          5052:         return a;
        !          5053:     }
        !          5054:     if ( bExp == 0 ) {
        !          5055:         if ( ( bSig0 | bSig1 ) == 0 ) {
        !          5056:  invalid:
        !          5057:             float_raise( float_flag_invalid );
        !          5058:             z.low = float128_default_nan_low;
        !          5059:             z.high = float128_default_nan_high;
        !          5060:             return z;
        !          5061:         }
        !          5062:         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
        !          5063:     }
        !          5064:     if ( aExp == 0 ) {
        !          5065:         if ( ( aSig0 | aSig1 ) == 0 ) return a;
        !          5066:         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
        !          5067:     }
        !          5068:     expDiff = aExp - bExp;
        !          5069:     if ( expDiff < -1 ) return a;
        !          5070:     shortShift128Left(
        !          5071:         aSig0 | LIT64( 0x0001000000000000 ),
        !          5072:         aSig1,
        !          5073:         15 - ( expDiff < 0 ),
        !          5074:         &aSig0,
        !          5075:         &aSig1
        !          5076:     );
        !          5077:     shortShift128Left(
        !          5078:         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
        !          5079:     q = le128( bSig0, bSig1, aSig0, aSig1 );
        !          5080:     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
        !          5081:     expDiff -= 64;
        !          5082:     while ( 0 < expDiff ) {
        !          5083:         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
        !          5084:         q = ( 4 < q ) ? q - 4 : 0;
        !          5085:         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
        !          5086:         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
        !          5087:         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
        !          5088:         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
        !          5089:         expDiff -= 61;
        !          5090:     }
        !          5091:     if ( -64 < expDiff ) {
        !          5092:         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
        !          5093:         q = ( 4 < q ) ? q - 4 : 0;
        !          5094:         q >>= - expDiff;
        !          5095:         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
        !          5096:         expDiff += 52;
        !          5097:         if ( expDiff < 0 ) {
        !          5098:             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
        !          5099:         }
        !          5100:         else {
        !          5101:             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
        !          5102:         }
        !          5103:         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
        !          5104:         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
        !          5105:     }
        !          5106:     else {
        !          5107:         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
        !          5108:         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
        !          5109:     }
        !          5110:     do {
        !          5111:         alternateASig0 = aSig0;
        !          5112:         alternateASig1 = aSig1;
        !          5113:         ++q;
        !          5114:         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
        !          5115:     } while ( 0 <= (sbits64) aSig0 );
        !          5116:     add128(
        !          5117:         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
        !          5118:     if (    ( sigMean0 < 0 )
        !          5119:          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
        !          5120:         aSig0 = alternateASig0;
        !          5121:         aSig1 = alternateASig1;
        !          5122:     }
        !          5123:     zSign = ( (sbits64) aSig0 < 0 );
        !          5124:     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
        !          5125:     return
        !          5126:         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
        !          5127:
        !          5128: }
        !          5129:
        !          5130: /*
        !          5131: -------------------------------------------------------------------------------
        !          5132: Returns the square root of the quadruple-precision floating-point value `a'.
        !          5133: The operation is performed according to the IEC/IEEE Standard for Binary
        !          5134: Floating-Point Arithmetic.
        !          5135: -------------------------------------------------------------------------------
        !          5136: */
        !          5137: float128 float128_sqrt( float128 a )
        !          5138: {
        !          5139:     flag aSign;
        !          5140:     int32 aExp, zExp;
        !          5141:     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
        !          5142:     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
        !          5143:     float128 z;
        !          5144:
        !          5145:     aSig1 = extractFloat128Frac1( a );
        !          5146:     aSig0 = extractFloat128Frac0( a );
        !          5147:     aExp = extractFloat128Exp( a );
        !          5148:     aSign = extractFloat128Sign( a );
        !          5149:     if ( aExp == 0x7FFF ) {
        !          5150:         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
        !          5151:         if ( ! aSign ) return a;
        !          5152:         goto invalid;
        !          5153:     }
        !          5154:     if ( aSign ) {
        !          5155:         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
        !          5156:  invalid:
        !          5157:         float_raise( float_flag_invalid );
        !          5158:         z.low = float128_default_nan_low;
        !          5159:         z.high = float128_default_nan_high;
        !          5160:         return z;
        !          5161:     }
        !          5162:     if ( aExp == 0 ) {
        !          5163:         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
        !          5164:         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
        !          5165:     }
        !          5166:     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
        !          5167:     aSig0 |= LIT64( 0x0001000000000000 );
        !          5168:     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
        !          5169:     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
        !          5170:     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
        !          5171:     doubleZSig0 = zSig0<<1;
        !          5172:     mul64To128( zSig0, zSig0, &term0, &term1 );
        !          5173:     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
        !          5174:     while ( (sbits64) rem0 < 0 ) {
        !          5175:         --zSig0;
        !          5176:         doubleZSig0 -= 2;
        !          5177:         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
        !          5178:     }
        !          5179:     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
        !          5180:     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
        !          5181:         if ( zSig1 == 0 ) zSig1 = 1;
        !          5182:         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
        !          5183:         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
        !          5184:         mul64To128( zSig1, zSig1, &term2, &term3 );
        !          5185:         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
        !          5186:         while ( (sbits64) rem1 < 0 ) {
        !          5187:             --zSig1;
        !          5188:             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
        !          5189:             term3 |= 1;
        !          5190:             term2 |= doubleZSig0;
        !          5191:             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
        !          5192:         }
        !          5193:         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
        !          5194:     }
        !          5195:     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
        !          5196:     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
        !          5197:
        !          5198: }
        !          5199:
        !          5200: /*
        !          5201: -------------------------------------------------------------------------------
        !          5202: Returns 1 if the quadruple-precision floating-point value `a' is equal to
        !          5203: the corresponding value `b', and 0 otherwise.  The comparison is performed
        !          5204: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          5205: -------------------------------------------------------------------------------
        !          5206: */
        !          5207: flag float128_eq( float128 a, float128 b )
        !          5208: {
        !          5209:
        !          5210:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5211:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5212:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5213:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5214:        ) {
        !          5215:         if (    float128_is_signaling_nan( a )
        !          5216:              || float128_is_signaling_nan( b ) ) {
        !          5217:             float_raise( float_flag_invalid );
        !          5218:         }
        !          5219:         return 0;
        !          5220:     }
        !          5221:     return
        !          5222:            ( a.low == b.low )
        !          5223:         && (    ( a.high == b.high )
        !          5224:              || (    ( a.low == 0 )
        !          5225:                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
        !          5226:            );
        !          5227:
        !          5228: }
        !          5229:
        !          5230: /*
        !          5231: -------------------------------------------------------------------------------
        !          5232: Returns 1 if the quadruple-precision floating-point value `a' is less than
        !          5233: or equal to the corresponding value `b', and 0 otherwise.  The comparison
        !          5234: is performed according to the IEC/IEEE Standard for Binary Floating-Point
        !          5235: Arithmetic.
        !          5236: -------------------------------------------------------------------------------
        !          5237: */
        !          5238: flag float128_le( float128 a, float128 b )
        !          5239: {
        !          5240:     flag aSign, bSign;
        !          5241:
        !          5242:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5243:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5244:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5245:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5246:        ) {
        !          5247:         float_raise( float_flag_invalid );
        !          5248:         return 0;
        !          5249:     }
        !          5250:     aSign = extractFloat128Sign( a );
        !          5251:     bSign = extractFloat128Sign( b );
        !          5252:     if ( aSign != bSign ) {
        !          5253:         return
        !          5254:                aSign
        !          5255:             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          5256:                  == 0 );
        !          5257:     }
        !          5258:     return
        !          5259:           aSign ? le128( b.high, b.low, a.high, a.low )
        !          5260:         : le128( a.high, a.low, b.high, b.low );
        !          5261:
        !          5262: }
        !          5263:
        !          5264: /*
        !          5265: -------------------------------------------------------------------------------
        !          5266: Returns 1 if the quadruple-precision floating-point value `a' is less than
        !          5267: the corresponding value `b', and 0 otherwise.  The comparison is performed
        !          5268: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          5269: -------------------------------------------------------------------------------
        !          5270: */
        !          5271: flag float128_lt( float128 a, float128 b )
        !          5272: {
        !          5273:     flag aSign, bSign;
        !          5274:
        !          5275:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5276:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5277:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5278:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5279:        ) {
        !          5280:         float_raise( float_flag_invalid );
        !          5281:         return 0;
        !          5282:     }
        !          5283:     aSign = extractFloat128Sign( a );
        !          5284:     bSign = extractFloat128Sign( b );
        !          5285:     if ( aSign != bSign ) {
        !          5286:         return
        !          5287:                aSign
        !          5288:             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          5289:                  != 0 );
        !          5290:     }
        !          5291:     return
        !          5292:           aSign ? lt128( b.high, b.low, a.high, a.low )
        !          5293:         : lt128( a.high, a.low, b.high, b.low );
        !          5294:
        !          5295: }
        !          5296:
        !          5297: /*
        !          5298: -------------------------------------------------------------------------------
        !          5299: Returns 1 if the quadruple-precision floating-point value `a' is equal to
        !          5300: the corresponding value `b', and 0 otherwise.  The invalid exception is
        !          5301: raised if either operand is a NaN.  Otherwise, the comparison is performed
        !          5302: according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          5303: -------------------------------------------------------------------------------
        !          5304: */
        !          5305: flag float128_eq_signaling( float128 a, float128 b )
        !          5306: {
        !          5307:
        !          5308:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5309:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5310:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5311:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5312:        ) {
        !          5313:         float_raise( float_flag_invalid );
        !          5314:         return 0;
        !          5315:     }
        !          5316:     return
        !          5317:            ( a.low == b.low )
        !          5318:         && (    ( a.high == b.high )
        !          5319:              || (    ( a.low == 0 )
        !          5320:                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
        !          5321:            );
        !          5322:
        !          5323: }
        !          5324:
        !          5325: /*
        !          5326: -------------------------------------------------------------------------------
        !          5327: Returns 1 if the quadruple-precision floating-point value `a' is less than
        !          5328: or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
        !          5329: cause an exception.  Otherwise, the comparison is performed according to the
        !          5330: IEC/IEEE Standard for Binary Floating-Point Arithmetic.
        !          5331: -------------------------------------------------------------------------------
        !          5332: */
        !          5333: flag float128_le_quiet( float128 a, float128 b )
        !          5334: {
        !          5335:     flag aSign, bSign;
        !          5336:
        !          5337:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5338:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5339:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5340:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5341:        ) {
        !          5342:         if (    float128_is_signaling_nan( a )
        !          5343:              || float128_is_signaling_nan( b ) ) {
        !          5344:             float_raise( float_flag_invalid );
        !          5345:         }
        !          5346:         return 0;
        !          5347:     }
        !          5348:     aSign = extractFloat128Sign( a );
        !          5349:     bSign = extractFloat128Sign( b );
        !          5350:     if ( aSign != bSign ) {
        !          5351:         return
        !          5352:                aSign
        !          5353:             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          5354:                  == 0 );
        !          5355:     }
        !          5356:     return
        !          5357:           aSign ? le128( b.high, b.low, a.high, a.low )
        !          5358:         : le128( a.high, a.low, b.high, b.low );
        !          5359:
        !          5360: }
        !          5361:
        !          5362: /*
        !          5363: -------------------------------------------------------------------------------
        !          5364: Returns 1 if the quadruple-precision floating-point value `a' is less than
        !          5365: the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
        !          5366: exception.  Otherwise, the comparison is performed according to the IEC/IEEE
        !          5367: Standard for Binary Floating-Point Arithmetic.
        !          5368: -------------------------------------------------------------------------------
        !          5369: */
        !          5370: flag float128_lt_quiet( float128 a, float128 b )
        !          5371: {
        !          5372:     flag aSign, bSign;
        !          5373:
        !          5374:     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
        !          5375:               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
        !          5376:          || (    ( extractFloat128Exp( b ) == 0x7FFF )
        !          5377:               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        !          5378:        ) {
        !          5379:         if (    float128_is_signaling_nan( a )
        !          5380:              || float128_is_signaling_nan( b ) ) {
        !          5381:             float_raise( float_flag_invalid );
        !          5382:         }
        !          5383:         return 0;
        !          5384:     }
        !          5385:     aSign = extractFloat128Sign( a );
        !          5386:     bSign = extractFloat128Sign( b );
        !          5387:     if ( aSign != bSign ) {
        !          5388:         return
        !          5389:                aSign
        !          5390:             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
        !          5391:                  != 0 );
        !          5392:     }
        !          5393:     return
        !          5394:           aSign ? lt128( b.high, b.low, a.high, a.low )
        !          5395:         : lt128( a.high, a.low, b.high, b.low );
        !          5396:
        !          5397: }
        !          5398:
        !          5399: #endif
        !          5400:
        !          5401:
        !          5402: #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
        !          5403:
        !          5404: /*
        !          5405:  * These two routines are not part of the original softfloat distribution.
        !          5406:  *
        !          5407:  * They are based on the corresponding conversions to integer but return
        !          5408:  * unsigned numbers instead since these functions are required by GCC.
        !          5409:  *
        !          5410:  * Added by Mark Brinicombe <mark@netbsd.org>  27/09/97
        !          5411:  *
        !          5412:  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
        !          5413:  */
        !          5414:
        !          5415: /*
        !          5416: -------------------------------------------------------------------------------
        !          5417: Returns the result of converting the double-precision floating-point value
        !          5418: `a' to the 32-bit unsigned integer format.  The conversion is
        !          5419: performed according to the IEC/IEEE Standard for Binary Floating-point
        !          5420: Arithmetic, except that the conversion is always rounded toward zero.  If
        !          5421: `a' is a NaN, the largest positive integer is returned.  If the conversion
        !          5422: overflows, the largest integer positive is returned.
        !          5423: -------------------------------------------------------------------------------
        !          5424: */
        !          5425: uint32 float64_to_uint32_round_to_zero( float64 a )
        !          5426: {
        !          5427:     flag aSign;
        !          5428:     int16 aExp, shiftCount;
        !          5429:     bits64 aSig, savedASig;
        !          5430:     uint32 z;
        !          5431:
        !          5432:     aSig = extractFloat64Frac( a );
        !          5433:     aExp = extractFloat64Exp( a );
        !          5434:     aSign = extractFloat64Sign( a );
        !          5435:
        !          5436:     if (aSign) {
        !          5437:         float_raise( float_flag_invalid );
        !          5438:        return(0);
        !          5439:     }
        !          5440:
        !          5441:     if ( 0x41E < aExp ) {
        !          5442:         float_raise( float_flag_invalid );
        !          5443:         return 0xffffffff;
        !          5444:     }
        !          5445:     else if ( aExp < 0x3FF ) {
        !          5446:         if ( aExp || aSig ) float_set_inexact();
        !          5447:         return 0;
        !          5448:     }
        !          5449:     aSig |= LIT64( 0x0010000000000000 );
        !          5450:     shiftCount = 0x433 - aExp;
        !          5451:     savedASig = aSig;
        !          5452:     aSig >>= shiftCount;
        !          5453:     z = aSig;
        !          5454:     if ( ( aSig<<shiftCount ) != savedASig ) {
        !          5455:         float_set_inexact();
        !          5456:     }
        !          5457:     return z;
        !          5458:
        !          5459: }
        !          5460:
        !          5461: /*
        !          5462: -------------------------------------------------------------------------------
        !          5463: Returns the result of converting the single-precision floating-point value
        !          5464: `a' to the 32-bit unsigned integer format.  The conversion is
        !          5465: performed according to the IEC/IEEE Standard for Binary Floating-point
        !          5466: Arithmetic, except that the conversion is always rounded toward zero.  If
        !          5467: `a' is a NaN, the largest positive integer is returned.  If the conversion
        !          5468: overflows, the largest positive integer is returned.
        !          5469: -------------------------------------------------------------------------------
        !          5470: */
        !          5471: uint32 float32_to_uint32_round_to_zero( float32 a )
        !          5472: {
        !          5473:     flag aSign;
        !          5474:     int16 aExp, shiftCount;
        !          5475:     bits32 aSig;
        !          5476:     uint32 z;
        !          5477:
        !          5478:     aSig = extractFloat32Frac( a );
        !          5479:     aExp = extractFloat32Exp( a );
        !          5480:     aSign = extractFloat32Sign( a );
        !          5481:     shiftCount = aExp - 0x9E;
        !          5482:
        !          5483:     if (aSign) {
        !          5484:         float_raise( float_flag_invalid );
        !          5485:        return(0);
        !          5486:     }
        !          5487:     if ( 0 < shiftCount ) {
        !          5488:         float_raise( float_flag_invalid );
        !          5489:         return 0xFFFFFFFF;
        !          5490:     }
        !          5491:     else if ( aExp <= 0x7E ) {
        !          5492:         if ( aExp | aSig ) float_set_inexact();
        !          5493:         return 0;
        !          5494:     }
        !          5495:     aSig = ( aSig | 0x800000 )<<8;
        !          5496:     z = aSig>>( - shiftCount );
        !          5497:     if ( aSig<<( shiftCount & 31 ) ) {
        !          5498:         float_set_inexact();
        !          5499:     }
        !          5500:     return z;
        !          5501:
        !          5502: }
        !          5503:
        !          5504: #endif
        !          5505:
        !          5506: #endif /* !NO_IEEE */
CVSweb