[BACK]Return to fp_complete.c CVS log [TXT][DIR] Up to [local] / sys / arch / alpha / alpha

Annotation of sys/arch/alpha/alpha/fp_complete.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: fp_complete.c,v 1.7 2006/02/25 03:58:56 deraadt Exp $ */
                      2: /*     $NetBSD: fp_complete.c,v 1.5 2002/01/18 22:15:56 ross Exp $     */
                      3:
                      4: /*-
                      5:  * Copyright (c) 2001 Ross Harvey
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  * 3. All advertising materials mentioning features or use of this software
                     17:  *    must display the following acknowledgement:
                     18:  *     This product includes software developed by the NetBSD
                     19:  *     Foundation, Inc. and its contributors.
                     20:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     21:  *    contributors may be used to endorse or promote products derived
                     22:  *    from this software without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     25:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     26:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     27:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     28:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     29:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     30:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     31:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     32:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     33:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     34:  * POSSIBILITY OF SUCH DAMAGE.
                     35:  */
                     36:
                     37: #include <sys/param.h>
                     38: #include <sys/systm.h>
                     39: #include <sys/proc.h>
                     40:
                     41: #include <machine/cpu.h>
                     42: #include <machine/fpu.h>
                     43: #include <machine/reg.h>
                     44: #include <machine/cpu.h>
                     45: #include <alpha/alpha/db_instruction.h>
                     46:
                     47: #include <lib/libkern/softfloat.h>
                     48:
                     49: #define        TSWINSIZE 4     /* size of trap shadow window in u_int32_t units */
                     50:
                     51: /*     Set Name                Opcodes                 AARM C.* Symbols  */
                     52:
                     53: #define        CPUREG_CLASS            (0xfUL << 0x10)         /* INT[ALSM]      */
                     54: #define        FPUREG_CLASS            (0xfUL << 0x14)         /* ITFP, FLT[ILV] */
                     55: #define        CHECKFUNCTIONCODE       (1UL << 0x18)           /* MISC           */
                     56: #define        TRAPSHADOWBOUNDARY      (1UL << 0x00 |          /* PAL            */\
                     57:                                 1UL << 0x19 |          /* \PAL\          */\
                     58:                                 1UL << 0x1a |          /* JSR            */\
                     59:                                 1UL << 0x1b |          /* \PAL\          */\
                     60:                                 1UL << 0x1d |          /* \PAL\          */\
                     61:                                 1UL << 0x1e |          /* \PAL\          */\
                     62:                                 1UL << 0x1f |          /* \PAL\          */\
                     63:                                 0xffffUL << 0x30 |     /* branch ops     */\
                     64:                                 CHECKFUNCTIONCODE)
                     65:
                     66: #define        MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
                     67:        (u_int ## width ## _t)(sign) << ((width) - 1)                   |\
                     68:        (u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))      |\
                     69:        (u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)  |\
                     70:        (u_int ## width ## _t)(rest_of_frac)
                     71:
                     72: #define        FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
                     73: #define        FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
                     74:
                     75: #define IS_SUBNORMAL(v)        ((v)->exp == 0 && (v)->frac != 0)
                     76:
                     77: #define        PREFILTER_SUBNORMAL(p,v) if ((p)->p_md.md_flags & IEEE_MAP_DMZ  \
                     78:                                     && IS_SUBNORMAL(v))                \
                     79:                                         (v)->frac = 0; else
                     80:
                     81: #define        POSTFILTER_SUBNORMAL(p,v) if ((p)->p_md.md_flags & IEEE_MAP_UMZ \
                     82:                                      && IS_SUBNORMAL(v))               \
                     83:                                          (v)->frac = 0; else
                     84:
                     85:        /* Alpha returns 2.0 for true, all zeroes for false. */
                     86:
                     87: #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
                     88:
                     89:        /* Move bits from sw fp_c to hw fpcr. */
                     90:
                     91: #define        CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
                     92:
                     93: /*
                     94:  * Temporary trap shadow instrumentation. The [un]resolved counters
                     95:  * could be kept permanently, as they provide information on whether
                     96:  * user code has met AARM trap shadow generation requirements.
                     97:  */
                     98:
                     99: struct alpha_shadow {
                    100:        u_int64_t resolved;     /* cases trigger pc found */
                    101:        u_int64_t unresolved;   /* cases it wasn't, code problems? */
                    102:        u_int64_t scans;                /* trap shadow scans */
                    103:        u_int64_t len;          /* number of instructions examined */
                    104:        u_int64_t uop;          /* bit mask of unexpected opcodes */
                    105:        u_int64_t sqrts;        /* ev6+ square root single count */
                    106:        u_int64_t sqrtt;        /* ev6+ square root double count */
                    107:        u_int32_t ufunc;        /* bit mask of unexpected functions */
                    108:        u_int32_t max;          /* max trap shadow scan */
                    109:        u_int32_t nilswop;      /* unexpected op codes */
                    110:        u_int32_t nilswfunc;    /* unexpected function codes */
                    111:        u_int32_t nilanyop;     /* this "cannot happen" */
                    112:        u_int32_t vax;          /* sigs from vax fp opcodes */
                    113: } alpha_shadow, alpha_shadow_zero;
                    114:
                    115: static float64 float64_unk(float64, float64);
                    116: static float64 compare_un(float64, float64);
                    117: static float64 compare_eq(float64, float64);
                    118: static float64 compare_lt(float64, float64);
                    119: static float64 compare_le(float64, float64);
                    120: static void cvt_qs_ts_st_gf_qf(u_int32_t, struct proc *);
                    121: static void cvt_gd(u_int32_t, struct proc *);
                    122: static void cvt_qt_dg_qg(u_int32_t, struct proc *);
                    123: static void cvt_tq_gq(u_int32_t, struct proc *);
                    124:
                    125: static float32 (*swfp_s[])(float32, float32) = {
                    126:        float32_add, float32_sub, float32_mul, float32_div,
                    127: };
                    128:
                    129: static float64 (*swfp_t[])(float64, float64) = {
                    130:        float64_add, float64_sub, float64_mul, float64_div,
                    131:        compare_un,    compare_eq,    compare_lt,    compare_le,
                    132:        float64_unk, float64_unk, float64_unk, float64_unk
                    133: };
                    134:
                    135: static void (*swfp_cvt[])(u_int32_t, struct proc *) = {
                    136:        cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
                    137: };
                    138:
                    139: static void
                    140: this_cannot_happen(int what_cannot_happen, int64_t bits)
                    141: {
                    142:        static int total;
                    143:        alpha_instruction inst;
                    144:        static u_int64_t reported;
                    145:
                    146:        inst.bits = bits;
                    147:        ++alpha_shadow.nilswfunc;
                    148:        if (bits != -1)
                    149:                alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
                    150:        if (1UL << what_cannot_happen & reported)
                    151:                return;
                    152:        reported |= 1UL << what_cannot_happen;
                    153:        if (total >= 1000)
                    154:                return; /* right now, this return "cannot happen" */
                    155:        ++total;
                    156:        if (bits)
                    157:                printf("FP instruction %x\n", (unsigned int)bits);
                    158:        printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
                    159:            alpha_shadow.uop);
                    160: }
                    161:
                    162: static __inline void
                    163: sts(unsigned int rn, s_float *v, struct proc *p)
                    164: {
                    165:        alpha_sts(rn, v);
                    166:        PREFILTER_SUBNORMAL(p, v);
                    167: }
                    168:
                    169: static __inline void
                    170: stt(unsigned int rn, t_float *v, struct proc *p)
                    171: {
                    172:        alpha_stt(rn, v);
                    173:        PREFILTER_SUBNORMAL(p, v);
                    174: }
                    175:
                    176: static __inline void
                    177: lds(unsigned int rn, s_float *v, struct proc *p)
                    178: {
                    179:        POSTFILTER_SUBNORMAL(p, v);
                    180:        alpha_lds(rn, v);
                    181: }
                    182:
                    183: static __inline void
                    184: ldt(unsigned int rn, t_float *v, struct proc *p)
                    185: {
                    186:        POSTFILTER_SUBNORMAL(p, v);
                    187:        alpha_ldt(rn, v);
                    188: }
                    189:
                    190: static float64
                    191: compare_lt(float64 a, float64 b)
                    192: {
                    193:        return CMP_RESULT(float64_lt(a, b));
                    194: }
                    195:
                    196: static float64
                    197: compare_le(float64 a, float64 b)
                    198: {
                    199:        return CMP_RESULT(float64_le(a, b));
                    200: }
                    201:
                    202: static float64
                    203: compare_un(float64 a, float64 b)
                    204: {
                    205:        if (float64_is_nan(a) | float64_is_nan(b)) {
                    206:                if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
                    207:                        float_set_invalid();
                    208:                return CMP_RESULT(1);
                    209:        }
                    210:        return CMP_RESULT(0);
                    211: }
                    212:
                    213: static float64
                    214: compare_eq(float64 a, float64 b)
                    215: {
                    216:        return CMP_RESULT(float64_eq(a, b));
                    217: }
                    218: /*
                    219:  * A note regarding the VAX FP ops.
                    220:  *
                    221:  * The AARM gives us complete leeway to set or not set status flags on VAX
                    222:  * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
                    223:  * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
                    224:  * For the purely vax ones, it's hard to imagine ever running them.
                    225:  * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
                    226:  * to panic, assert, or print unlimited output based on a path through the
                    227:  * decoder, so weird cases don't become security issues.
                    228:  */
                    229: static void
                    230: cvt_qs_ts_st_gf_qf(u_int32_t inst_bits, struct proc *p)
                    231: {
                    232:        t_float tfb, tfc;
                    233:        s_float sfb, sfc;
                    234:        alpha_instruction inst;
                    235:
                    236:        inst.bits = inst_bits;
                    237:        /*
                    238:         * cvtst and cvtts have the same opcode, function, and source.  The
                    239:         * distinction for cvtst is hidden in the illegal modifier combinations.
                    240:         * We decode even the non-/s modifier, so that the fix-up-always mode
                    241:         * works on ev6 and later. The rounding bits are unused and fixed for
                    242:         * cvtst, so we check those too.
                    243:         */
                    244:        switch(inst.float_format.function) {
                    245:        case op_cvtst:
                    246:        case op_cvtst_u:
                    247:                sts(inst.float_detail.fb, &sfb, p);
                    248:                tfc.i = float32_to_float64(sfb.i);
                    249:                ldt(inst.float_detail.fc, &tfc, p);
                    250:                return;
                    251:        }
                    252:        if(inst.float_detail.src == 2) {
                    253:                stt(inst.float_detail.fb, &tfb, p);
                    254:                sfc.i = float64_to_float32(tfb.i);
                    255:                lds(inst.float_detail.fc, &sfc, p);
                    256:                return;
                    257:        }
                    258:        /* 0: S/F */
                    259:        /* 1:  /D */
                    260:        /* 3: Q/Q */
                    261:        this_cannot_happen(5, inst.generic_format.opcode);
                    262:        tfc.i = FLOAT64QNAN;
                    263:        ldt(inst.float_detail.fc, &tfc, p);
                    264:        return;
                    265: }
                    266:
                    267: static void
                    268: cvt_gd(u_int32_t inst_bits, struct proc *p)
                    269: {
                    270:        t_float tfb, tfc;
                    271:        alpha_instruction inst;
                    272:
                    273:        inst.bits = inst_bits;
                    274:        stt(inst.float_detail.fb, &tfb, p);
                    275:        (void) float64_to_float32(tfb.i);
                    276:        p->p_md.md_flags &= ~OPENBSD_FLAG_TO_FP_C(FP_X_IMP);
                    277:        tfc.i = float64_add(tfb.i, (float64)0);
                    278:        ldt(inst.float_detail.fc, &tfc, p);
                    279: }
                    280:
                    281: static void
                    282: cvt_qt_dg_qg(u_int32_t inst_bits, struct proc *p)
                    283: {
                    284:        t_float tfb, tfc;
                    285:        alpha_instruction inst;
                    286:
                    287:        inst.bits = inst_bits;
                    288:        switch(inst.float_detail.src) {
                    289:        case 0: /* S/F */
                    290:                this_cannot_happen(3, inst.bits);
                    291:                /* fall thru */
                    292:        case 1: /* D */
                    293:                /* VAX dirty 0's and reserved ops => UNPREDICTABLE */
                    294:                /* We've done what's important by just not trapping */
                    295:                tfc.i = 0;
                    296:                break;
                    297:        case 2: /* T/G */
                    298:                this_cannot_happen(4, inst.bits);
                    299:                tfc.i = 0;
                    300:                break;
                    301:        case 3: /* Q/Q */
                    302:                stt(inst.float_detail.fb, &tfb, p);
                    303:                tfc.i = int64_to_float64(tfb.i);
                    304:                break;
                    305:        }
                    306:        alpha_ldt(inst.float_detail.fc, &tfc);
                    307: }
                    308: /*
                    309:  * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
                    310:  *      unfortunate habit of always returning the nontrapping result.
                    311:  * XXX: there are several apparent AARM/AAH disagreements, as well as
                    312:  *      the issue of trap handler pc and trapping results.
                    313:  */
                    314: static void
                    315: cvt_tq_gq(u_int32_t inst_bits, struct proc *p)
                    316: {
                    317:        t_float tfb, tfc;
                    318:        alpha_instruction inst;
                    319:
                    320:        inst.bits = inst_bits;
                    321:        stt(inst.float_detail.fb, &tfb, p);
                    322:        tfc.i = float64_to_int64(tfb.i);
                    323:        alpha_ldt(inst.float_detail.fc, &tfc);  /* yes, ldt */
                    324: }
                    325:
                    326: static u_int64_t
                    327: fp_c_to_fpcr_1(u_int64_t fpcr, u_int64_t fp_c)
                    328: {
                    329:        u_int64_t disables;
                    330:
                    331:        /*
                    332:         * It's hard to arrange for conforming bit fields, because the FP_C
                    333:         * and the FPCR are both architected, with specified (and relatively
                    334:         * scrambled) bit numbers. Defining an internal unscrambled FP_C
                    335:         * wouldn't help much, because every user exception requires the
                    336:         * architected bit order in the sigcontext.
                    337:         *
                    338:         * Programs that fiddle with the fpcr exception bits (instead of fp_c)
                    339:         * will lose, because those bits can be and usually are subsetted;
                    340:         * the official home is in the fp_c. Furthermore, the kernel puts
                    341:         * phony enables (it lies :-) in the fpcr in order to get control when
                    342:         * it is necessary to initially set a sticky bit.
                    343:         */
                    344:
                    345:        fpcr &= FPCR_DYN(3);
                    346:
                    347:        /*
                    348:         * enable traps = case where flag bit is clear OR program wants a trap
                    349:         * enables = ~flags | mask
                    350:         * disables = ~(~flags | mask)
                    351:         * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
                    352:         */
                    353:        disables = FP_C_TO_OPENBSD_FLAG(fp_c) & ~FP_C_TO_OPENBSD_MASK(fp_c);
                    354:
                    355:        fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
                    356:        fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
                    357:
                    358: #      if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 &&          \
                    359:            FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 &&        \
                    360:            FP_X_UFL << (61 - 3) == FPCR_UNFD &&                        \
                    361:            FP_X_IMP << (61 - 3) == FPCR_INED &&                        \
                    362:            FP_X_OFL << (49 - 0) == FPCR_OVFD)
                    363: #              error "Assertion failed"
                    364:        /*
                    365:         * We don't care about the other built-in bit numbers because they
                    366:         * have been architecturally specified.
                    367:         */
                    368: #      endif
                    369:
                    370:        fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
                    371:        fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
                    372:        if (fp_c & FP_C_MIRRORED)
                    373:                fpcr |= FPCR_SUM;
                    374:        if (fp_c & IEEE_MAP_UMZ)
                    375:                fpcr |= FPCR_UNDZ | FPCR_UNFD;
                    376:        fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
                    377:        return fpcr;
                    378: }
                    379:
                    380: static void
                    381: fp_c_to_fpcr(struct proc *p)
                    382: {
                    383:        alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), p->p_md.md_flags));
                    384: }
                    385:
                    386: void
                    387: alpha_write_fp_c(struct proc *p, u_int64_t fp_c)
                    388: {
                    389:        u_int64_t md_flags;
                    390:
                    391:        fp_c &= MDP_FP_C;
                    392:        md_flags = p->p_md.md_flags;
                    393:        if ((md_flags & MDP_FP_C) == fp_c)
                    394:                return;
                    395:        p->p_md.md_flags = (md_flags & ~MDP_FP_C) | fp_c;
                    396:        alpha_enable_fp(p, 1);
                    397:        fp_c_to_fpcr(p);
                    398:        alpha_pal_wrfen(0);
                    399: }
                    400:
                    401: u_int64_t
                    402: alpha_read_fp_c(struct proc *p)
                    403: {
                    404:        /*
                    405:         * A possibly desirable EV6-specific optimization would deviate from
                    406:         * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
                    407:         * but in a transparent way. Some of the code for that would need to
                    408:         * go right here.
                    409:         */
                    410:        return p->p_md.md_flags & MDP_FP_C;
                    411: }
                    412:
                    413: static float64
                    414: float64_unk(float64 a, float64 b)
                    415: {
                    416:        return 0;
                    417: }
                    418:
                    419: /*
                    420:  * The real function field encodings for IEEE and VAX FP instructions.
                    421:  *
                    422:  * Since there is only one operand type field, the cvtXX instructions
                    423:  * require a variety of special cases, and these have to be analyzed as
                    424:  * they don't always fit into the field descriptions in AARM section I.
                    425:  *
                    426:  * Lots of staring at bits in the appendix shows what's really going on.
                    427:  *
                    428:  *        |           |
                    429:  * 15 14 13|12 11 10 09|08 07 06 05
                    430:  * --------======------============
                    431:  *  TRAP   : RND : SRC : FUNCTION  :
                    432:  *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
                    433:  *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
                    434:  *        |                             /V overfloat enable (if int output)
                    435:  *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
                    436:  *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
                    437:  *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
                    438:  *  1  0  1|. . .:. . . . . . . . . . . ./SU
                    439:  *        |                             /SV
                    440:  *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
                    441:  *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)   (IEEE only)
                    442:  *        |                             /SVI (if int output)   (IEEE only)
                    443:  *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
                    444:  *        |  not all combinations are valid.
                    445:  *        |           |
                    446:  * 15 14 13|12 11 10 09|08 07 06 05
                    447:  * --------======------============
                    448:  *  TRAP   : RND : SRC : FUNCTION  :
                    449:  *        | 0  0 . . . . . . . . . . . ./C Chopped
                    450:  *        : 0  1 . . . . . . . . . . . ./M Minus Infinity
                    451:  *        | 1  0 . . . . . . . . . . . .   Normal
                    452:  *        : 1  1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
                    453:  *        |           |
                    454:  * 15 14 13|12 11 10 09|08 07 06 05
                    455:  * --------======------============
                    456:  *  TRAP   : RND : SRC : FUNCTION  :
                    457:  *                0 0. . . . . . . . . . S/F
                    458:  *                0 1. . . . . . . . . . -/D
                    459:  *                1 0. . . . . . . . . . T/G
                    460:  *                1 1. . . . . . . . . . Q/Q
                    461:  *        |           |
                    462:  * 15 14 13|12 11 10 09|08 07 06 05
                    463:  * --------======------============
                    464:  *  TRAP   : RND : SRC : FUNCTION  :
                    465:  *                      0  0  0  0 . . . addX
                    466:  *                      0  0  0  1 . . . subX
                    467:  *                      0  0  1  0 . . . mulX
                    468:  *                      0  0  1  1 . . . divX
                    469:  *                      0  1  0  0 . . . cmpXun
                    470:  *                      0  1  0  1 . . . cmpXeq
                    471:  *                      0  1  1  0 . . . cmpXlt
                    472:  *                      0  1  1  1 . . . cmpXle
                    473:  *                      1  0  0  0 . . . reserved
                    474:  *                      1  0  0  1 . . . reserved
                    475:  *                      1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
                    476:  *                      1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
                    477:  *                      1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
                    478:  *                      1  1  0  1 . . . cvtXd   (vax only)
                    479:  *                      1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
                    480:  *                      1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
                    481:  *        |           |
                    482:  * 15 14 13|12 11 10 09|08 07 06 05      the twilight zone
                    483:  * --------======------============
                    484:  *  TRAP   : RND : SRC : FUNCTION  :
                    485:  * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
                    486:  *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
                    487:  *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
                    488:  *  x  0  x  x  x  x  0         1  1  1  1 . . . cvttq/_ (src == T)
                    489:  */
                    490:
                    491: static void
                    492: alpha_fp_interpret(alpha_instruction *pc, struct proc *p, u_int64_t bits)
                    493: {
                    494:        s_float sfa, sfb, sfc;
                    495:        t_float tfa, tfb, tfc;
                    496:        alpha_instruction inst;
                    497:
                    498:        inst.bits = bits;
                    499:        switch(inst.generic_format.opcode) {
                    500:        default:
                    501:                /* this "cannot happen" */
                    502:                this_cannot_happen(2, inst.bits);
                    503:                return;
                    504:        case op_any_float:
                    505:                if (inst.float_format.function == op_cvtql_sv ||
                    506:                    inst.float_format.function == op_cvtql_v) {
                    507:                        alpha_stt(inst.float_detail.fb, &tfb);
                    508:                        sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
                    509:                        alpha_lds(inst.float_detail.fc, &sfc);
                    510:                        float_raise(FP_X_INV);
                    511:                } else {
                    512:                        ++alpha_shadow.nilanyop;
                    513:                        this_cannot_happen(3, inst.bits);
                    514:                }
                    515:                break;
                    516:        case op_vax_float:
                    517:                ++alpha_shadow.vax;     /* fall thru */
                    518:        case op_ieee_float:
                    519:        case op_fix_float:
                    520:                switch(inst.float_detail.src) {
                    521:                case op_src_sf:
                    522:                        sts(inst.float_detail.fb, &sfb, p);
                    523:                        if (inst.float_detail.opclass == 10)
                    524:                                sfc.i = float32_sqrt(sfb.i);
                    525:                        else if (inst.float_detail.opclass & ~3) {
                    526:                                this_cannot_happen(1, inst.bits);
                    527:                                sfc.i = FLOAT32QNAN;
                    528:                        } else {
                    529:                                sts(inst.float_detail.fa, &sfa, p);
                    530:                                sfc.i = (*swfp_s[inst.float_detail.opclass])(
                    531:                                    sfa.i, sfb.i);
                    532:                        }
                    533:                        lds(inst.float_detail.fc, &sfc, p);
                    534:                        break;
                    535:                case op_src_xd:
                    536:                case op_src_tg:
                    537:                        if (inst.float_detail.opclass >= 12)
                    538:                                (*swfp_cvt[inst.float_detail.opclass - 12])(
                    539:                                    inst.bits, p);
                    540:                        else {
                    541:                                stt(inst.float_detail.fb, &tfb, p);
                    542:                                if (inst.float_detail.opclass == 10)
                    543:                                        tfc.i = float64_sqrt(tfb.i);
                    544:                                else {
                    545:                                        stt(inst.float_detail.fa, &tfa, p);
                    546:                                        tfc.i = (*swfp_t[inst.float_detail
                    547:                                            .opclass])(tfa.i, tfb.i);
                    548:                                }
                    549:                                ldt(inst.float_detail.fc, &tfc, p);
                    550:                        }
                    551:                        break;
                    552:                case op_src_qq:
                    553:                        float_raise(FP_X_IMP);
                    554:                        break;
                    555:                }
                    556:        }
                    557: }
                    558:
                    559: static int
                    560: alpha_fp_complete_at(alpha_instruction *trigger_pc, struct proc *p,
                    561:     u_int64_t *ucode)
                    562: {
                    563:        int needsig;
                    564:        alpha_instruction inst;
                    565:        u_int64_t rm, fpcr, orig_fpcr;
                    566:        u_int64_t orig_flags, new_flags, changed_flags, md_flags;
                    567:
                    568:        if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) {
                    569:                this_cannot_happen(6, -1);
                    570:                return SIGSEGV;
                    571:        }
                    572:        alpha_enable_fp(p, 1);
                    573:        /*
                    574:         * If necessary, lie about the dynamic rounding mode so emulation
                    575:         * software need go to only one place for it, and so we don't have to
                    576:         * lock any memory locations or pass a third parameter to every
                    577:         * SoftFloat entry point.
                    578:         */
                    579:        orig_fpcr = fpcr = alpha_read_fpcr();
                    580:        rm = inst.float_detail.rnd;
                    581:        if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
                    582:                fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
                    583:                alpha_write_fpcr(fpcr);
                    584:        }
                    585:        orig_flags = FP_C_TO_OPENBSD_FLAG(p->p_md.md_flags);
                    586:
                    587:        alpha_fp_interpret(trigger_pc, p, inst.bits);
                    588:
                    589:        md_flags = p->p_md.md_flags;
                    590:
                    591:        new_flags = FP_C_TO_OPENBSD_FLAG(md_flags);
                    592:        changed_flags = orig_flags ^ new_flags;
                    593:        KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
                    594:        alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
                    595:        needsig = changed_flags & FP_C_TO_OPENBSD_MASK(md_flags);
                    596:        alpha_pal_wrfen(0);
                    597:        if (__predict_false(needsig)) {
                    598:                *ucode = needsig;
                    599:                return SIGFPE;
                    600:        }
                    601:        return 0;
                    602: }
                    603:
                    604: int
                    605: alpha_fp_complete(u_long a0, u_long a1, struct proc *p, u_int64_t *ucode)
                    606: {
                    607:        int t;
                    608:        int sig;
                    609:        u_int64_t op_class;
                    610:        alpha_instruction inst;
                    611:        /* "trigger_pc" is Compaq's term for the earliest faulting op */
                    612:        alpha_instruction *trigger_pc, *usertrap_pc;
                    613:        alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
                    614:
                    615:        sig = SIGFPE;
                    616:        pc = (alpha_instruction *)p->p_md.md_tf->tf_regs[FRAME_PC];
                    617:        trigger_pc = pc - 1;    /* for ALPHA_AMASK_PAT case */
                    618:        if (cpu_amask & ALPHA_AMASK_PAT) {
                    619:                if (a0 & 1 || alpha_fp_sync_complete) {
                    620:                        sig = alpha_fp_complete_at(trigger_pc, p, ucode);
                    621:                        goto done;
                    622:                }
                    623:        }
                    624:        *ucode = a0;
                    625:        if (!(a0 & 1))
                    626:                return sig;
                    627: /*
                    628:  * At this point we are somewhere in the trap shadow of one or more instruc-
                    629:  * tions that have trapped with software completion specified.  We have a mask
                    630:  * of the registers written by trapping instructions.
                    631:  *
                    632:  * Now step backwards through the trap shadow, clearing bits in the
                    633:  * destination write mask until the trigger instruction is found, and
                    634:  * interpret this one instruction in SW. If a SIGFPE is not required, back up
                    635:  * the PC until just after this instruction and restart. This will execute all
                    636:  * trap shadow instructions between the trigger pc and the trap pc twice.
                    637:  *
                    638:  * If a SIGFPE is generated from the OSF1 emulation,  back up one more
                    639:  * instruction to the trigger pc itself. Native binaries don't because it
                    640:  * is non-portable and completely defeats the intended purpose of IEEE
                    641:  * traps -- for example, to count the number of exponent wraps for a later
                    642:  * correction.
                    643:  */
                    644:        trigger_pc = 0;
                    645:        win_begin = pc;
                    646:        ++alpha_shadow.scans;
                    647:        t = alpha_shadow.len;
                    648:        for (--pc; a1; --pc) {
                    649:                ++alpha_shadow.len;
                    650:                if (pc < win_begin) {
                    651:                        win_begin = pc - TSWINSIZE + 1;
                    652:                        if (copyin(win_begin, tsw, sizeof tsw)) {
                    653:                                /* sigh, try to get just one */
                    654:                                win_begin = pc;
                    655:                                if (copyin(win_begin, tsw, 4))
                    656:                                        return SIGSEGV;
                    657:                        }
                    658:                }
                    659:                inst = tsw[pc - win_begin];
                    660:                op_class = 1UL << inst.generic_format.opcode;
                    661:                if (op_class & FPUREG_CLASS) {
                    662:                        a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
                    663:                        trigger_pc = pc;
                    664:                } else if (op_class & CPUREG_CLASS) {
                    665:                        a1 &= ~(1UL << inst.operate_generic_format.rc);
                    666:                        trigger_pc = pc;
                    667:                } else if (op_class & TRAPSHADOWBOUNDARY) {
                    668:                        if (op_class & CHECKFUNCTIONCODE) {
                    669:                                if (inst.mem_format.displacement == op_trapb ||
                    670:                                    inst.mem_format.displacement == op_excb)
                    671:                                        break;  /* code breaks AARM rules */
                    672:                        } else
                    673:                                break; /* code breaks AARM rules */
                    674:                }
                    675:                /* Some shadow-safe op, probably load, store, or FPTI class */
                    676:        }
                    677:        t = alpha_shadow.len - t;
                    678:        if (t > alpha_shadow.max)
                    679:                alpha_shadow.max = t;
                    680:        if (__predict_true(trigger_pc != 0 && a1 == 0)) {
                    681:                ++alpha_shadow.resolved;
                    682:                sig = alpha_fp_complete_at(trigger_pc, p, ucode);
                    683:        } else {
                    684:                ++alpha_shadow.unresolved;
                    685:                return sig;
                    686:        }
                    687: done:
                    688:        if (sig) {
                    689:                usertrap_pc = trigger_pc + 1;
                    690:                p->p_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
                    691:                return sig;
                    692:        }
                    693:        return 0;
                    694: }

CVSweb