Annotation of sys/arch/m68k/fpsp/res_func.sa, Revision 1.1.1.1
1.1 nbrk 1: * $OpenBSD: res_func.sa,v 1.5 2006/11/30 20:08:22 mk Exp $
2: * $NetBSD: res_func.sa,v 1.3 1994/10/26 07:49:22 cgd Exp $
3:
4: * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
5: * M68000 Hi-Performance Microprocessor Division
6: * M68040 Software Package
7: *
8: * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
9: * All rights reserved.
10: *
11: * THE SOFTWARE is provided on an "AS IS" basis and without warranty.
12: * To the maximum extent permitted by applicable law,
13: * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
14: * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
15: * PARTICULAR PURPOSE and any warranty against infringement with
16: * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
17: * and any accompanying written materials.
18: *
19: * To the maximum extent permitted by applicable law,
20: * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
21: * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
22: * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
23: * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
24: * SOFTWARE. Motorola assumes no responsibility for the maintenance
25: * and support of the SOFTWARE.
26: *
27: * You are hereby granted a copyright license to use, modify, and
28: * distribute the SOFTWARE so long as this entire notice is retained
29: * without alteration in any modified and/or redistributed versions,
30: * and that such modified versions are clearly identified as such.
31: * No licenses are granted by implication, estoppel or otherwise
32: * under any patents or trademarks of Motorola, Inc.
33:
34: *
35: * res_func.sa 3.9 7/29/91
36: *
37: * Normalizes denormalized numbers if necessary and updates the
38: * stack frame. The function is then restored back into the
39: * machine and the 040 completes the operation. This routine
40: * is only used by the unsupported data type/format handler.
41: * (Exception vector 55).
42: *
43: * For packed move out (fmove.p fpm,<ea>) the operation is
44: * completed here; data is packed and moved to user memory.
45: * The stack is restored to the 040 only in the case of a
46: * reportable exception in the conversion.
47: *
48:
49: RES_FUNC IDNT 2,1 Motorola 040 Floating Point Software Package
50:
51: section 8
52:
53: include fpsp.h
54:
55: sp_bnds: dc.w $3f81,$407e
56: dc.w $3f6a,$0000
57: dp_bnds: dc.w $3c01,$43fe
58: dc.w $3bcd,$0000
59:
60: xref mem_write
61: xref bindec
62: xref get_fline
63: xref round
64: xref denorm
65: xref dest_ext
66: xref dest_dbl
67: xref dest_sgl
68: xref unf_sub
69: xref nrm_set
70: xref dnrm_lp
71: xref ovf_res
72: xref reg_dest
73: xref t_ovfl
74: xref t_unfl
75:
76: xdef res_func
77: xdef p_move
78:
79: res_func:
80: clr.b DNRM_FLG(a6)
81: clr.b RES_FLG(a6)
82: clr.b CU_ONLY(a6)
83: tst.b DY_MO_FLG(a6)
84: beq.b monadic
85: dyadic:
86: btst.b #7,DTAG(a6) ;if dop = norm=000, zero=001,
87: * ;inf=010 or nan=011
88: beq.b monadic ;then branch
89: * ;else denorm
90: * HANDLE DESTINATION DENORM HERE
91: * ;set dtag to norm
92: * ;write the tag & fpte15 to the fstack
93: lea.l FPTEMP(a6),a0
94:
95: bclr.b #sign_bit,LOCAL_EX(a0)
96: sne LOCAL_SGN(a0)
97:
98: bsr nrm_set ;normalize number (exp will go negative)
99: bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign
100: bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
101: beq.b dpos
102: bset.b #sign_bit,LOCAL_EX(a0)
103: dpos:
104: bfclr DTAG(a6){0:4} ;set tag to normalized, FPTE15 = 0
105: bset.b #4,DTAG(a6) ;set FPTE15
106: or.b #$0f,DNRM_FLG(a6)
107: monadic:
108: lea.l ETEMP(a6),a0
109: btst.b #direction_bit,CMDREG1B(a6) ;check direction
110: bne.w opclass3 ;it is a mv out
111: *
112: * At this point, only oplcass 0 and 2 possible
113: *
114: btst.b #7,STAG(a6) ;if sop = norm=000, zero=001,
115: * ;inf=010 or nan=011
116: bne.w mon_dnrm ;else denorm
117: tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would
118: bne.w normal ;require normalization of denorm
119:
120: * At this point:
121: * monadic instructions: fabs = $18 fneg = $1a ftst = $3a
122: * fmove = $00 fsmove = $40 fdmove = $44
123: * fsqrt = $05* fssqrt = $41 fdsqrt = $45
124: * (*fsqrt reencoded to $05)
125: *
126: move.w CMDREG1B(a6),d0 ;get command register
127: andi.l #$7f,d0 ;strip to only command word
128: *
129: * At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
130: * fdsqrt are possible.
131: * For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
132: * For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
133: *
134: btst.l #0,d0
135: bne.w normal ;weed out fsqrt instructions
136: *
137: * cu_norm handles fmove in instructions with normalized inputs.
138: * The routine round is used to correctly round the input for the
139: * destination precision and mode.
140: *
141: cu_norm:
142: st CU_ONLY(a6) ;set cu-only inst flag
143: move.w CMDREG1B(a6),d0
144: andi.b #$3b,d0 ;isolate bits to select inst
145: tst.b d0
146: beq.l cu_nmove ;if zero, it is an fmove
147: cmpi.b #$18,d0
148: beq.l cu_nabs ;if $18, it is fabs
149: cmpi.b #$1a,d0
150: beq.l cu_nneg ;if $1a, it is fneg
151: *
152: * Inst is ftst. Check the source operand and set the cc's accordingly.
153: * No write is done, so simply rts.
154: *
155: cu_ntst:
156: move.w LOCAL_EX(a0),d0
157: bclr.l #15,d0
158: sne LOCAL_SGN(a0)
159: beq.b cu_ntpo
160: or.l #neg_mask,USER_FPSR(a6) ;set N
161: cu_ntpo:
162: cmpi.w #$7fff,d0 ;test for inf/nan
163: bne.b cu_ntcz
164: tst.l LOCAL_HI(a0)
165: bne.b cu_ntn
166: tst.l LOCAL_LO(a0)
167: bne.b cu_ntn
168: or.l #inf_mask,USER_FPSR(a6)
169: rts
170: cu_ntn:
171: or.l #nan_mask,USER_FPSR(a6)
172: move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
173: * ;snan handler
174:
175: rts
176: cu_ntcz:
177: tst.l LOCAL_HI(a0)
178: bne.l cu_ntsx
179: tst.l LOCAL_LO(a0)
180: bne.l cu_ntsx
181: or.l #z_mask,USER_FPSR(a6)
182: cu_ntsx:
183: rts
184: *
185: * Inst is fabs. Execute the absolute value function on the input.
186: * Branch to the fmove code. If the operand is NaN, do nothing.
187: *
188: cu_nabs:
189: move.b STAG(a6),d0
190: btst.l #5,d0 ;test for NaN or zero
191: bne wr_etemp ;if either, simply write it
192: bclr.b #7,LOCAL_EX(a0) ;do abs
193: bra.b cu_nmove ;fmove code will finish
194: *
195: * Inst is fneg. Execute the negate value function on the input.
196: * Fall though to the fmove code. If the operand is NaN, do nothing.
197: *
198: cu_nneg:
199: move.b STAG(a6),d0
200: btst.l #5,d0 ;test for NaN or zero
201: bne wr_etemp ;if either, simply write it
202: bchg.b #7,LOCAL_EX(a0) ;do neg
203: *
204: * Inst is fmove. This code also handles all result writes.
205: * If bit 2 is set, round is forced to double. If it is clear,
206: * and bit 6 is set, round is forced to single. If both are clear,
207: * the round precision is found in the fpcr. If the rounding precision
208: * is double or single, round the result before the write.
209: *
210: cu_nmove:
211: move.b STAG(a6),d0
212: andi.b #$e0,d0 ;isolate stag bits
213: bne wr_etemp ;if not norm, simply write it
214: btst.b #2,CMDREG1B+1(a6) ;check for rd
215: bne cu_nmrd
216: btst.b #6,CMDREG1B+1(a6) ;check for rs
217: bne cu_nmrs
218: *
219: * The move or operation is not with forced precision. Test for
220: * nan or inf as the input; if so, simply write it to FPn. Use the
221: * FPCR_MODE byte to get rounding on norms and zeros.
222: *
223: cu_nmnr:
224: bfextu FPCR_MODE(a6){0:2},d0
225: tst.b d0 ;check for extended
226: beq cu_wrexn ;if so, just write result
227: cmpi.b #1,d0 ;check for single
228: beq cu_nmrs ;fall through to double
229: *
230: * The move is fdmove or round precision is double.
231: *
232: cu_nmrd:
233: move.l #2,d0 ;set up the size for denorm
234: move.w LOCAL_EX(a0),d1 ;compare exponent to double threshold
235: and.w #$7fff,d1
236: cmp.w #$3c01,d1
237: bls cu_nunfl
238: bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
239: or.l #$00020000,d1 ;or in rprec (double)
240: clr.l d0 ;clear g,r,s for round
241: bclr.b #sign_bit,LOCAL_EX(a0) ;convert to internal format
242: sne LOCAL_SGN(a0)
243: bsr.l round
244: bfclr LOCAL_SGN(a0){0:8}
245: beq.b cu_nmrdc
246: bset.b #sign_bit,LOCAL_EX(a0)
247: cu_nmrdc:
248: move.w LOCAL_EX(a0),d1 ;check for overflow
249: and.w #$7fff,d1
250: cmp.w #$43ff,d1
251: bge cu_novfl ;take care of overflow case
252: bra.w cu_wrexn
253: *
254: * The move is fsmove or round precision is single.
255: *
256: cu_nmrs:
257: move.l #1,d0
258: move.w LOCAL_EX(a0),d1
259: and.w #$7fff,d1
260: cmp.w #$3f81,d1
261: bls cu_nunfl
262: bfextu FPCR_MODE(a6){2:2},d1
263: or.l #$00010000,d1
264: clr.l d0
265: bclr.b #sign_bit,LOCAL_EX(a0)
266: sne LOCAL_SGN(a0)
267: bsr.l round
268: bfclr LOCAL_SGN(a0){0:8}
269: beq.b cu_nmrsc
270: bset.b #sign_bit,LOCAL_EX(a0)
271: cu_nmrsc:
272: move.w LOCAL_EX(a0),d1
273: and.w #$7FFF,d1
274: cmp.w #$407f,d1
275: blt cu_wrexn
276: *
277: * The operand is above precision boundaries. Use t_ovfl to
278: * generate the correct value.
279: *
280: cu_novfl:
281: bsr t_ovfl
282: bra cu_wrexn
283: *
284: * The operand is below precision boundaries. Use denorm to
285: * generate the correct value.
286: *
287: cu_nunfl:
288: bclr.b #sign_bit,LOCAL_EX(a0)
289: sne LOCAL_SGN(a0)
290: bsr denorm
291: bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
292: beq.b cu_nucont
293: bset.b #sign_bit,LOCAL_EX(a0)
294: cu_nucont:
295: bfextu FPCR_MODE(a6){2:2},d1
296: btst.b #2,CMDREG1B+1(a6) ;check for rd
297: bne inst_d
298: btst.b #6,CMDREG1B+1(a6) ;check for rs
299: bne inst_s
300: swap d1
301: move.b FPCR_MODE(a6),d1
302: lsr.b #6,d1
303: swap d1
304: bra inst_sd
305: inst_d:
306: or.l #$00020000,d1
307: bra inst_sd
308: inst_s:
309: or.l #$00010000,d1
310: inst_sd:
311: bclr.b #sign_bit,LOCAL_EX(a0)
312: sne LOCAL_SGN(a0)
313: bsr.l round
314: bfclr LOCAL_SGN(a0){0:8}
315: beq.b cu_nuflp
316: bset.b #sign_bit,LOCAL_EX(a0)
317: cu_nuflp:
318: btst.b #inex2_bit,FPSR_EXCEPT(a6)
319: beq.b cu_nuninx
320: or.l #aunfl_mask,USER_FPSR(a6) ;if the round was inex, set AUNFL
321: cu_nuninx:
322: tst.l LOCAL_HI(a0) ;test for zero
323: bne.b cu_nunzro
324: tst.l LOCAL_LO(a0)
325: bne.b cu_nunzro
326: *
327: * The mantissa is zero from the denorm loop. Check sign and rmode
328: * to see if rounding should have occurred which would leave the lsb.
329: *
330: move.l USER_FPCR(a6),d0
331: andi.l #$30,d0 ;isolate rmode
332: cmpi.l #$20,d0
333: blt.b cu_nzro
334: bne.b cu_nrp
335: cu_nrm:
336: tst.w LOCAL_EX(a0) ;if positive, set lsb
337: bge.b cu_nzro
338: btst.b #7,FPCR_MODE(a6) ;check for double
339: beq.b cu_nincs
340: bra.b cu_nincd
341: cu_nrp:
342: tst.w LOCAL_EX(a0) ;if positive, set lsb
343: blt.b cu_nzro
344: btst.b #7,FPCR_MODE(a6) ;check for double
345: beq.b cu_nincs
346: cu_nincd:
347: or.l #$800,LOCAL_LO(a0) ;inc for double
348: bra cu_nunzro
349: cu_nincs:
350: or.l #$100,LOCAL_HI(a0) ;inc for single
351: bra cu_nunzro
352: cu_nzro:
353: or.l #z_mask,USER_FPSR(a6)
354: move.b STAG(a6),d0
355: andi.b #$e0,d0
356: cmpi.b #$40,d0 ;check if input was tagged zero
357: beq.b cu_numv
358: cu_nunzro:
359: or.l #unfl_mask,USER_FPSR(a6) ;set unfl
360: cu_numv:
361: move.l (a0),ETEMP(a6)
362: move.l 4(a0),ETEMP_HI(a6)
363: move.l 8(a0),ETEMP_LO(a6)
364: *
365: * Write the result to memory, setting the fpsr cc bits. NaN and Inf
366: * bypass cu_wrexn.
367: *
368: cu_wrexn:
369: tst.w LOCAL_EX(a0) ;test for zero
370: beq.b cu_wrzero
371: cmp.w #$8000,LOCAL_EX(a0) ;test for zero
372: bne.b cu_wreon
373: cu_wrzero:
374: or.l #z_mask,USER_FPSR(a6) ;set Z bit
375: cu_wreon:
376: tst.w LOCAL_EX(a0)
377: bpl wr_etemp
378: or.l #neg_mask,USER_FPSR(a6)
379: bra wr_etemp
380:
381: *
382: * HANDLE SOURCE DENORM HERE
383: *
384: * ;clear denorm stag to norm
385: * ;write the new tag & ete15 to the fstack
386: mon_dnrm:
387: *
388: * At this point, check for the cases in which normalizing the
389: * denorm produces incorrect results.
390: *
391: tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would
392: bne.b nrm_src ;require normalization of denorm
393:
394: * At this point:
395: * monadic instructions: fabs = $18 fneg = $1a ftst = $3a
396: * fmove = $00 fsmove = $40 fdmove = $44
397: * fsqrt = $05* fssqrt = $41 fdsqrt = $45
398: * (*fsqrt reencoded to $05)
399: *
400: move.w CMDREG1B(a6),d0 ;get command register
401: andi.l #$7f,d0 ;strip to only command word
402: *
403: * At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
404: * fdsqrt are possible.
405: * For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
406: * For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
407: *
408: btst.l #0,d0
409: bne.b nrm_src ;weed out fsqrt instructions
410: st CU_ONLY(a6) ;set cu-only inst flag
411: bra cu_dnrm ;fmove, fabs, fneg, ftst
412: * ;cases go to cu_dnrm
413: nrm_src:
414: bclr.b #sign_bit,LOCAL_EX(a0)
415: sne LOCAL_SGN(a0)
416: bsr nrm_set ;normalize number (exponent will go
417: * ; negative)
418: bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign
419:
420: bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
421: beq.b spos
422: bset.b #sign_bit,LOCAL_EX(a0)
423: spos:
424: bfclr STAG(a6){0:4} ;set tag to normalized, FPTE15 = 0
425: bset.b #4,STAG(a6) ;set ETE15
426: or.b #$f0,DNRM_FLG(a6)
427: normal:
428: tst.b DNRM_FLG(a6) ;check if any of the ops were denorms
429: bne ck_wrap ;if so, check if it is a potential
430: * ;wrap-around case
431: fix_stk:
432: move.b #$fe,CU_SAVEPC(a6)
433: bclr.b #E1,E_BYTE(a6)
434:
435: clr.w NMNEXC(a6)
436:
437: st.b RES_FLG(a6) ;indicate that a restore is needed
438: rts
439:
440: *
441: * cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
442: * ftst) completely in software without an frestore to the 040.
443: *
444: cu_dnrm:
445: st.b CU_ONLY(a6)
446: move.w CMDREG1B(a6),d0
447: andi.b #$3b,d0 ;isolate bits to select inst
448: tst.b d0
449: beq.l cu_dmove ;if zero, it is an fmove
450: cmpi.b #$18,d0
451: beq.l cu_dabs ;if $18, it is fabs
452: cmpi.b #$1a,d0
453: beq.l cu_dneg ;if $1a, it is fneg
454: *
455: * Inst is ftst. Check the source operand and set the cc's accordingly.
456: * No write is done, so simply rts.
457: *
458: cu_dtst:
459: move.w LOCAL_EX(a0),d0
460: bclr.l #15,d0
461: sne LOCAL_SGN(a0)
462: beq.b cu_dtpo
463: or.l #neg_mask,USER_FPSR(a6) ;set N
464: cu_dtpo:
465: cmpi.w #$7fff,d0 ;test for inf/nan
466: bne.b cu_dtcz
467: tst.l LOCAL_HI(a0)
468: bne.b cu_dtn
469: tst.l LOCAL_LO(a0)
470: bne.b cu_dtn
471: or.l #inf_mask,USER_FPSR(a6)
472: rts
473: cu_dtn:
474: or.l #nan_mask,USER_FPSR(a6)
475: move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
476: * ;snan handler
477: rts
478: cu_dtcz:
479: tst.l LOCAL_HI(a0)
480: bne.l cu_dtsx
481: tst.l LOCAL_LO(a0)
482: bne.l cu_dtsx
483: or.l #z_mask,USER_FPSR(a6)
484: cu_dtsx:
485: rts
486: *
487: * Inst is fabs. Execute the absolute value function on the input.
488: * Branch to the fmove code.
489: *
490: cu_dabs:
491: bclr.b #7,LOCAL_EX(a0) ;do abs
492: bra.b cu_dmove ;fmove code will finish
493: *
494: * Inst is fneg. Execute the negate value function on the input.
495: * Fall though to the fmove code.
496: *
497: cu_dneg:
498: bchg.b #7,LOCAL_EX(a0) ;do neg
499: *
500: * Inst is fmove. This code also handles all result writes.
501: * If bit 2 is set, round is forced to double. If it is clear,
502: * and bit 6 is set, round is forced to single. If both are clear,
503: * the round precision is found in the fpcr. If the rounding precision
504: * is double or single, the result is zero, and the mode is checked
505: * to determine if the lsb of the result should be set.
506: *
507: cu_dmove:
508: btst.b #2,CMDREG1B+1(a6) ;check for rd
509: bne cu_dmrd
510: btst.b #6,CMDREG1B+1(a6) ;check for rs
511: bne cu_dmrs
512: *
513: * The move or operation is not with forced precision. Use the
514: * FPCR_MODE byte to get rounding.
515: *
516: cu_dmnr:
517: bfextu FPCR_MODE(a6){0:2},d0
518: tst.b d0 ;check for extended
519: beq cu_wrexd ;if so, just write result
520: cmpi.b #1,d0 ;check for single
521: beq cu_dmrs ;fall through to double
522: *
523: * The move is fdmove or round precision is double. Result is zero.
524: * Check rmode for rp or rm and set lsb accordingly.
525: *
526: cu_dmrd:
527: bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
528: tst.w LOCAL_EX(a0) ;check sign
529: blt.b cu_dmdn
530: cmpi.b #3,d1 ;check for rp
531: bne cu_dpd ;load double pos zero
532: bra cu_dpdr ;load double pos zero w/lsb
533: cu_dmdn:
534: cmpi.b #2,d1 ;check for rm
535: bne cu_dnd ;load double neg zero
536: bra cu_dndr ;load double neg zero w/lsb
537: *
538: * The move is fsmove or round precision is single. Result is zero.
539: * Check for rp or rm and set lsb accordingly.
540: *
541: cu_dmrs:
542: bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
543: tst.w LOCAL_EX(a0) ;check sign
544: blt.b cu_dmsn
545: cmpi.b #3,d1 ;check for rp
546: bne cu_spd ;load single pos zero
547: bra cu_spdr ;load single pos zero w/lsb
548: cu_dmsn:
549: cmpi.b #2,d1 ;check for rm
550: bne cu_snd ;load single neg zero
551: bra cu_sndr ;load single neg zero w/lsb
552: *
553: * The precision is extended, so the result in etemp is correct.
554: * Simply set unfl (not inex2 or aunfl) and write the result to
555: * the correct fp register.
556: cu_wrexd:
557: or.l #unfl_mask,USER_FPSR(a6)
558: tst.w LOCAL_EX(a0)
559: beq wr_etemp
560: or.l #neg_mask,USER_FPSR(a6)
561: bra wr_etemp
562: *
563: * These routines write +/- zero in double format. The routines
564: * cu_dpdr and cu_dndr set the double lsb.
565: *
566: cu_dpd:
567: move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero
568: clr.l LOCAL_HI(a0)
569: clr.l LOCAL_LO(a0)
570: or.l #z_mask,USER_FPSR(a6)
571: or.l #unfinx_mask,USER_FPSR(a6)
572: bra wr_etemp
573: cu_dpdr:
574: move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero
575: clr.l LOCAL_HI(a0)
576: move.l #$800,LOCAL_LO(a0) ;with lsb set
577: or.l #unfinx_mask,USER_FPSR(a6)
578: bra wr_etemp
579: cu_dnd:
580: move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero
581: clr.l LOCAL_HI(a0)
582: clr.l LOCAL_LO(a0)
583: or.l #z_mask,USER_FPSR(a6)
584: or.l #neg_mask,USER_FPSR(a6)
585: or.l #unfinx_mask,USER_FPSR(a6)
586: bra wr_etemp
587: cu_dndr:
588: move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero
589: clr.l LOCAL_HI(a0)
590: move.l #$800,LOCAL_LO(a0) ;with lsb set
591: or.l #neg_mask,USER_FPSR(a6)
592: or.l #unfinx_mask,USER_FPSR(a6)
593: bra wr_etemp
594: *
595: * These routines write +/- zero in single format. The routines
596: * cu_dpdr and cu_dndr set the single lsb.
597: *
598: cu_spd:
599: move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero
600: clr.l LOCAL_HI(a0)
601: clr.l LOCAL_LO(a0)
602: or.l #z_mask,USER_FPSR(a6)
603: or.l #unfinx_mask,USER_FPSR(a6)
604: bra wr_etemp
605: cu_spdr:
606: move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero
607: move.l #$100,LOCAL_HI(a0) ;with lsb set
608: clr.l LOCAL_LO(a0)
609: or.l #unfinx_mask,USER_FPSR(a6)
610: bra wr_etemp
611: cu_snd:
612: move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero
613: clr.l LOCAL_HI(a0)
614: clr.l LOCAL_LO(a0)
615: or.l #z_mask,USER_FPSR(a6)
616: or.l #neg_mask,USER_FPSR(a6)
617: or.l #unfinx_mask,USER_FPSR(a6)
618: bra wr_etemp
619: cu_sndr:
620: move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero
621: move.l #$100,LOCAL_HI(a0) ;with lsb set
622: clr.l LOCAL_LO(a0)
623: or.l #neg_mask,USER_FPSR(a6)
624: or.l #unfinx_mask,USER_FPSR(a6)
625: bra wr_etemp
626:
627: *
628: * This code checks for 16-bit overflow conditions on dyadic
629: * operations which are not restorable into the floating-point
630: * unit and must be completed in software. Basically, this
631: * condition exists with a very large norm and a denorm. One
632: * of the operands must be denormalized to enter this code.
633: *
634: * Flags used:
635: * DY_MO_FLG contains 0 for monadic op, $ff for dyadic
636: * DNRM_FLG contains $00 for neither op denormalized
637: * $0f for the destination op denormalized
638: * $f0 for the source op denormalized
639: * $ff for both ops denormalzed
640: *
641: * The wrap-around condition occurs for add, sub, div, and cmp
642: * when
643: *
644: * abs(dest_exp - src_exp) >= $8000
645: *
646: * and for mul when
647: *
648: * (dest_exp + src_exp) < $0
649: *
650: * we must process the operation here if this case is true.
651: *
652: * The rts following the frcfpn routine is the exit from res_func
653: * for this condition. The restore flag (RES_FLG) is left clear.
654: * No frestore is done unless an exception is to be reported.
655: *
656: * For fadd:
657: * if(sign_of(dest) != sign_of(src))
658: * replace exponent of src with $3fff (keep sign)
659: * use fpu to perform dest+new_src (user's rmode and X)
660: * clr sticky
661: * else
662: * set sticky
663: * call round with user's precision and mode
664: * move result to fpn and wbtemp
665: *
666: * For fsub:
667: * if(sign_of(dest) == sign_of(src))
668: * replace exponent of src with $3fff (keep sign)
669: * use fpu to perform dest+new_src (user's rmode and X)
670: * clr sticky
671: * else
672: * set sticky
673: * call round with user's precision and mode
674: * move result to fpn and wbtemp
675: *
676: * For fdiv/fsgldiv:
677: * if(both operands are denorm)
678: * restore_to_fpu;
679: * if(dest is norm)
680: * force_ovf;
681: * else(dest is denorm)
682: * force_unf:
683: *
684: * For fcmp:
685: * if(dest is norm)
686: * N = sign_of(dest);
687: * else(dest is denorm)
688: * N = sign_of(src);
689: *
690: * For fmul:
691: * if(both operands are denorm)
692: * force_unf;
693: * if((dest_exp + src_exp) < 0)
694: * force_unf:
695: * else
696: * restore_to_fpu;
697: *
698: * local equates:
699: addcode equ $22
700: subcode equ $28
701: mulcode equ $23
702: divcode equ $20
703: cmpcode equ $38
704: ck_wrap:
705: tst.b DY_MO_FLG(a6) ;check for fsqrt
706: beq fix_stk ;if zero, it is fsqrt
707: move.w CMDREG1B(a6),d0
708: andi.w #$3b,d0 ;strip to command bits
709: cmpi.w #addcode,d0
710: beq wrap_add
711: cmpi.w #subcode,d0
712: beq wrap_sub
713: cmpi.w #mulcode,d0
714: beq wrap_mul
715: cmpi.w #cmpcode,d0
716: beq wrap_cmp
717: *
718: * Inst is fdiv.
719: *
720: wrap_div:
721: cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
722: beq fix_stk ;restore to fpu
723: *
724: * One of the ops is denormalized. Test for wrap condition
725: * and force the result.
726: *
727: cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
728: bne.b div_srcd
729: div_destd:
730: bsr.l ckinf_ns
731: bne fix_stk
732: bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
733: bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
734: sub.l d1,d0 ;subtract dest from src
735: cmp.l #$7fff,d0
736: blt fix_stk ;if less, not wrap case
737: clr.b WBTEMP_SGN(a6)
738: move.w ETEMP_EX(a6),d0 ;find the sign of the result
739: move.w FPTEMP_EX(a6),d1
740: eor.w d1,d0
741: andi.w #$8000,d0
742: beq force_unf
743: st.b WBTEMP_SGN(a6)
744: bra force_unf
745:
746: ckinf_ns:
747: move.b STAG(a6),d0 ;check source tag for inf or nan
748: bra ck_in_com
749: ckinf_nd:
750: move.b DTAG(a6),d0 ;check destination tag for inf or nan
751: ck_in_com:
752: andi.b #$60,d0 ;isolate tag bits
753: cmp.b #$40,d0 ;is it inf?
754: beq nan_or_inf ;not wrap case
755: cmp.b #$60,d0 ;is it nan?
756: beq nan_or_inf ;yes, not wrap case?
757: cmp.b #$20,d0 ;is it a zero?
758: beq nan_or_inf ;yes
759: clr.l d0
760: rts ;then it is either a zero of norm,
761: * ;check wrap case
762: nan_or_inf:
763: moveq.l #-1,d0
764: rts
765:
766:
767:
768: div_srcd:
769: bsr.l ckinf_nd
770: bne fix_stk
771: bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
772: bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
773: sub.l d1,d0 ;subtract src from dest
774: cmp.l #$8000,d0
775: blt fix_stk ;if less, not wrap case
776: clr.b WBTEMP_SGN(a6)
777: move.w ETEMP_EX(a6),d0 ;find the sign of the result
778: move.w FPTEMP_EX(a6),d1
779: eor.w d1,d0
780: andi.w #$8000,d0
781: beq.b force_ovf
782: st.b WBTEMP_SGN(a6)
783: *
784: * This code handles the case of the instruction resulting in
785: * an overflow condition.
786: *
787: force_ovf:
788: bclr.b #E1,E_BYTE(a6)
789: or.l #ovfl_inx_mask,USER_FPSR(a6)
790: clr.w NMNEXC(a6)
791: lea.l WBTEMP(a6),a0 ;point a0 to memory location
792: move.w CMDREG1B(a6),d0
793: btst.l #6,d0 ;test for forced precision
794: beq.b frcovf_fpcr
795: btst.l #2,d0 ;check for double
796: bne.b frcovf_dbl
797: move.l #$1,d0 ;inst is forced single
798: bra.b frcovf_rnd
799: frcovf_dbl:
800: move.l #$2,d0 ;inst is forced double
801: bra.b frcovf_rnd
802: frcovf_fpcr:
803: bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
804: frcovf_rnd:
805:
806: * The 881/882 does not set inex2 for the following case, so the
807: * line is commented out to be compatible with 881/882
808: * tst.b d0
809: * beq.b frcovf_x
810: * or.l #inex2_mask,USER_FPSR(a6) ;if prec is s or d, set inex2
811:
812: *frcovf_x:
813: bsr.l ovf_res ;get correct result based on
814: * ;round precision/mode. This
815: * ;sets FPSR_CC correctly
816: * ;returns in external format
817: bfclr WBTEMP_SGN(a6){0:8}
818: beq frcfpn
819: bset.b #sign_bit,WBTEMP_EX(a6)
820: bra frcfpn
821: *
822: * Inst is fadd.
823: *
824: wrap_add:
825: cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
826: beq fix_stk ;restore to fpu
827: *
828: * One of the ops is denormalized. Test for wrap condition
829: * and complete the instruction.
830: *
831: cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
832: bne.b add_srcd
833: add_destd:
834: bsr.l ckinf_ns
835: bne fix_stk
836: bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
837: bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
838: sub.l d1,d0 ;subtract dest from src
839: cmp.l #$8000,d0
840: blt fix_stk ;if less, not wrap case
841: bra add_wrap
842: add_srcd:
843: bsr.l ckinf_nd
844: bne fix_stk
845: bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
846: bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
847: sub.l d1,d0 ;subtract src from dest
848: cmp.l #$8000,d0
849: blt fix_stk ;if less, not wrap case
850: *
851: * Check the signs of the operands. If they are unlike, the fpu
852: * can be used to add the norm and 1.0 with the sign of the
853: * denorm and it will correctly generate the result in extended
854: * precision. We can then call round with no sticky and the result
855: * will be correct for the user's rounding mode and precision. If
856: * the signs are the same, we call round with the sticky bit set
857: * and the result will be correctfor the user's rounding mode and
858: * precision.
859: *
860: add_wrap:
861: move.w ETEMP_EX(a6),d0
862: move.w FPTEMP_EX(a6),d1
863: eor.w d1,d0
864: andi.w #$8000,d0
865: beq add_same
866: *
867: * The signs are unlike.
868: *
869: cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
870: bne.b add_u_srcd
871: move.w FPTEMP_EX(a6),d0
872: andi.w #$8000,d0
873: or.w #$3fff,d0 ;force the exponent to +/- 1
874: move.w d0,FPTEMP_EX(a6) ;in the denorm
875: move.l USER_FPCR(a6),d0
876: andi.l #$30,d0
877: fmove.l d0,fpcr ;set up users rmode and X
878: fmove.x ETEMP(a6),fp0
879: fadd.x FPTEMP(a6),fp0
880: lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
881: fmove.l fpsr,d1
882: or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
883: fmove.x fp0,WBTEMP(a6) ;write result to memory
884: lsr.l #4,d0 ;put rmode in lower 2 bits
885: move.l USER_FPCR(a6),d1
886: andi.l #$c0,d1
887: lsr.l #6,d1 ;put precision in upper word
888: swap d1
889: or.l d0,d1 ;set up for round call
890: clr.l d0 ;force sticky to zero
891: bclr.b #sign_bit,WBTEMP_EX(a6)
892: sne WBTEMP_SGN(a6)
893: bsr.l round ;round result to users rmode & prec
894: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
895: beq frcfpnr
896: bset.b #sign_bit,WBTEMP_EX(a6)
897: bra frcfpnr
898: add_u_srcd:
899: move.w ETEMP_EX(a6),d0
900: andi.w #$8000,d0
901: or.w #$3fff,d0 ;force the exponent to +/- 1
902: move.w d0,ETEMP_EX(a6) ;in the denorm
903: move.l USER_FPCR(a6),d0
904: andi.l #$30,d0
905: fmove.l d0,fpcr ;set up users rmode and X
906: fmove.x ETEMP(a6),fp0
907: fadd.x FPTEMP(a6),fp0
908: fmove.l fpsr,d1
909: or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
910: lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
911: fmove.x fp0,WBTEMP(a6) ;write result to memory
912: lsr.l #4,d0 ;put rmode in lower 2 bits
913: move.l USER_FPCR(a6),d1
914: andi.l #$c0,d1
915: lsr.l #6,d1 ;put precision in upper word
916: swap d1
917: or.l d0,d1 ;set up for round call
918: clr.l d0 ;force sticky to zero
919: bclr.b #sign_bit,WBTEMP_EX(a6)
920: sne WBTEMP_SGN(a6) ;use internal format for round
921: bsr.l round ;round result to users rmode & prec
922: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
923: beq frcfpnr
924: bset.b #sign_bit,WBTEMP_EX(a6)
925: bra frcfpnr
926: *
927: * Signs are alike:
928: *
929: add_same:
930: cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
931: bne.b add_s_srcd
932: add_s_destd:
933: lea.l ETEMP(a6),a0
934: move.l USER_FPCR(a6),d0
935: andi.l #$30,d0
936: lsr.l #4,d0 ;put rmode in lower 2 bits
937: move.l USER_FPCR(a6),d1
938: andi.l #$c0,d1
939: lsr.l #6,d1 ;put precision in upper word
940: swap d1
941: or.l d0,d1 ;set up for round call
942: move.l #$20000000,d0 ;set sticky for round
943: bclr.b #sign_bit,ETEMP_EX(a6)
944: sne ETEMP_SGN(a6)
945: bsr.l round ;round result to users rmode & prec
946: bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format
947: beq.b add_s_dclr
948: bset.b #sign_bit,ETEMP_EX(a6)
949: add_s_dclr:
950: lea.l WBTEMP(a6),a0
951: move.l ETEMP(a6),(a0) ;write result to wbtemp
952: move.l ETEMP_HI(a6),4(a0)
953: move.l ETEMP_LO(a6),8(a0)
954: tst.w ETEMP_EX(a6)
955: bgt add_ckovf
956: or.l #neg_mask,USER_FPSR(a6)
957: bra add_ckovf
958: add_s_srcd:
959: lea.l FPTEMP(a6),a0
960: move.l USER_FPCR(a6),d0
961: andi.l #$30,d0
962: lsr.l #4,d0 ;put rmode in lower 2 bits
963: move.l USER_FPCR(a6),d1
964: andi.l #$c0,d1
965: lsr.l #6,d1 ;put precision in upper word
966: swap d1
967: or.l d0,d1 ;set up for round call
968: move.l #$20000000,d0 ;set sticky for round
969: bclr.b #sign_bit,FPTEMP_EX(a6)
970: sne FPTEMP_SGN(a6)
971: bsr.l round ;round result to users rmode & prec
972: bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
973: beq.b add_s_sclr
974: bset.b #sign_bit,FPTEMP_EX(a6)
975: add_s_sclr:
976: lea.l WBTEMP(a6),a0
977: move.l FPTEMP(a6),(a0) ;write result to wbtemp
978: move.l FPTEMP_HI(a6),4(a0)
979: move.l FPTEMP_LO(a6),8(a0)
980: tst.w FPTEMP_EX(a6)
981: bgt add_ckovf
982: or.l #neg_mask,USER_FPSR(a6)
983: add_ckovf:
984: move.w WBTEMP_EX(a6),d0
985: andi.w #$7fff,d0
986: cmpi.w #$7fff,d0
987: bne frcfpnr
988: *
989: * The result has overflowed to $7fff exponent. Set I, ovfl,
990: * and aovfl, and clr the mantissa (incorrectly set by the
991: * round routine.)
992: *
993: or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6)
994: clr.l 4(a0)
995: bra frcfpnr
996: *
997: * Inst is fsub.
998: *
999: wrap_sub:
1000: cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
1001: beq fix_stk ;restore to fpu
1002: *
1003: * One of the ops is denormalized. Test for wrap condition
1004: * and complete the instruction.
1005: *
1006: cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
1007: bne.b sub_srcd
1008: sub_destd:
1009: bsr.l ckinf_ns
1010: bne fix_stk
1011: bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
1012: bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
1013: sub.l d1,d0 ;subtract src from dest
1014: cmp.l #$8000,d0
1015: blt fix_stk ;if less, not wrap case
1016: bra sub_wrap
1017: sub_srcd:
1018: bsr.l ckinf_nd
1019: bne fix_stk
1020: bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
1021: bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
1022: sub.l d1,d0 ;subtract dest from src
1023: cmp.l #$8000,d0
1024: blt fix_stk ;if less, not wrap case
1025: *
1026: * Check the signs of the operands. If they are alike, the fpu
1027: * can be used to subtract from the norm 1.0 with the sign of the
1028: * denorm and it will correctly generate the result in extended
1029: * precision. We can then call round with no sticky and the result
1030: * will be correct for the user's rounding mode and precision. If
1031: * the signs are unlike, we call round with the sticky bit set
1032: * and the result will be correctfor the user's rounding mode and
1033: * precision.
1034: *
1035: sub_wrap:
1036: move.w ETEMP_EX(a6),d0
1037: move.w FPTEMP_EX(a6),d1
1038: eor.w d1,d0
1039: andi.w #$8000,d0
1040: bne sub_diff
1041: *
1042: * The signs are alike.
1043: *
1044: cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
1045: bne.b sub_u_srcd
1046: move.w FPTEMP_EX(a6),d0
1047: andi.w #$8000,d0
1048: or.w #$3fff,d0 ;force the exponent to +/- 1
1049: move.w d0,FPTEMP_EX(a6) ;in the denorm
1050: move.l USER_FPCR(a6),d0
1051: andi.l #$30,d0
1052: fmove.l d0,fpcr ;set up users rmode and X
1053: fmove.x FPTEMP(a6),fp0
1054: fsub.x ETEMP(a6),fp0
1055: fmove.l fpsr,d1
1056: or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
1057: lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
1058: fmove.x fp0,WBTEMP(a6) ;write result to memory
1059: lsr.l #4,d0 ;put rmode in lower 2 bits
1060: move.l USER_FPCR(a6),d1
1061: andi.l #$c0,d1
1062: lsr.l #6,d1 ;put precision in upper word
1063: swap d1
1064: or.l d0,d1 ;set up for round call
1065: clr.l d0 ;force sticky to zero
1066: bclr.b #sign_bit,WBTEMP_EX(a6)
1067: sne WBTEMP_SGN(a6)
1068: bsr.l round ;round result to users rmode & prec
1069: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1070: beq frcfpnr
1071: bset.b #sign_bit,WBTEMP_EX(a6)
1072: bra frcfpnr
1073: sub_u_srcd:
1074: move.w ETEMP_EX(a6),d0
1075: andi.w #$8000,d0
1076: or.w #$3fff,d0 ;force the exponent to +/- 1
1077: move.w d0,ETEMP_EX(a6) ;in the denorm
1078: move.l USER_FPCR(a6),d0
1079: andi.l #$30,d0
1080: fmove.l d0,fpcr ;set up users rmode and X
1081: fmove.x FPTEMP(a6),fp0
1082: fsub.x ETEMP(a6),fp0
1083: fmove.l fpsr,d1
1084: or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
1085: lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
1086: fmove.x fp0,WBTEMP(a6) ;write result to memory
1087: lsr.l #4,d0 ;put rmode in lower 2 bits
1088: move.l USER_FPCR(a6),d1
1089: andi.l #$c0,d1
1090: lsr.l #6,d1 ;put precision in upper word
1091: swap d1
1092: or.l d0,d1 ;set up for round call
1093: clr.l d0 ;force sticky to zero
1094: bclr.b #sign_bit,WBTEMP_EX(a6)
1095: sne WBTEMP_SGN(a6)
1096: bsr.l round ;round result to users rmode & prec
1097: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1098: beq frcfpnr
1099: bset.b #sign_bit,WBTEMP_EX(a6)
1100: bra frcfpnr
1101: *
1102: * Signs are unlike:
1103: *
1104: sub_diff:
1105: cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
1106: bne.b sub_s_srcd
1107: sub_s_destd:
1108: lea.l ETEMP(a6),a0
1109: move.l USER_FPCR(a6),d0
1110: andi.l #$30,d0
1111: lsr.l #4,d0 ;put rmode in lower 2 bits
1112: move.l USER_FPCR(a6),d1
1113: andi.l #$c0,d1
1114: lsr.l #6,d1 ;put precision in upper word
1115: swap d1
1116: or.l d0,d1 ;set up for round call
1117: move.l #$20000000,d0 ;set sticky for round
1118: *
1119: * Since the dest is the denorm, the sign is the opposite of the
1120: * norm sign.
1121: *
1122: eori.w #$8000,ETEMP_EX(a6) ;flip sign on result
1123: tst.w ETEMP_EX(a6)
1124: bgt.b sub_s_dwr
1125: or.l #neg_mask,USER_FPSR(a6)
1126: sub_s_dwr:
1127: bclr.b #sign_bit,ETEMP_EX(a6)
1128: sne ETEMP_SGN(a6)
1129: bsr.l round ;round result to users rmode & prec
1130: bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1131: beq.b sub_s_dclr
1132: bset.b #sign_bit,ETEMP_EX(a6)
1133: sub_s_dclr:
1134: lea.l WBTEMP(a6),a0
1135: move.l ETEMP(a6),(a0) ;write result to wbtemp
1136: move.l ETEMP_HI(a6),4(a0)
1137: move.l ETEMP_LO(a6),8(a0)
1138: bra sub_ckovf
1139: sub_s_srcd:
1140: lea.l FPTEMP(a6),a0
1141: move.l USER_FPCR(a6),d0
1142: andi.l #$30,d0
1143: lsr.l #4,d0 ;put rmode in lower 2 bits
1144: move.l USER_FPCR(a6),d1
1145: andi.l #$c0,d1
1146: lsr.l #6,d1 ;put precision in upper word
1147: swap d1
1148: or.l d0,d1 ;set up for round call
1149: move.l #$20000000,d0 ;set sticky for round
1150: bclr.b #sign_bit,FPTEMP_EX(a6)
1151: sne FPTEMP_SGN(a6)
1152: bsr.l round ;round result to users rmode & prec
1153: bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1154: beq.b sub_s_sclr
1155: bset.b #sign_bit,FPTEMP_EX(a6)
1156: sub_s_sclr:
1157: lea.l WBTEMP(a6),a0
1158: move.l FPTEMP(a6),(a0) ;write result to wbtemp
1159: move.l FPTEMP_HI(a6),4(a0)
1160: move.l FPTEMP_LO(a6),8(a0)
1161: tst.w FPTEMP_EX(a6)
1162: bgt sub_ckovf
1163: or.l #neg_mask,USER_FPSR(a6)
1164: sub_ckovf:
1165: move.w WBTEMP_EX(a6),d0
1166: andi.w #$7fff,d0
1167: cmpi.w #$7fff,d0
1168: bne frcfpnr
1169: *
1170: * The result has overflowed to $7fff exponent. Set I, ovfl,
1171: * and aovfl, and clr the mantissa (incorrectly set by the
1172: * round routine.)
1173: *
1174: or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6)
1175: clr.l 4(a0)
1176: bra frcfpnr
1177: *
1178: * Inst is fcmp.
1179: *
1180: wrap_cmp:
1181: cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
1182: beq fix_stk ;restore to fpu
1183: *
1184: * One of the ops is denormalized. Test for wrap condition
1185: * and complete the instruction.
1186: *
1187: cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
1188: bne.b cmp_srcd
1189: cmp_destd:
1190: bsr.l ckinf_ns
1191: bne fix_stk
1192: bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
1193: bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
1194: sub.l d1,d0 ;subtract dest from src
1195: cmp.l #$8000,d0
1196: blt fix_stk ;if less, not wrap case
1197: tst.w ETEMP_EX(a6) ;set N to ~sign_of(src)
1198: bge cmp_setn
1199: rts
1200: cmp_srcd:
1201: bsr.l ckinf_nd
1202: bne fix_stk
1203: bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
1204: bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
1205: sub.l d1,d0 ;subtract src from dest
1206: cmp.l #$8000,d0
1207: blt fix_stk ;if less, not wrap case
1208: tst.w FPTEMP_EX(a6) ;set N to sign_of(dest)
1209: blt cmp_setn
1210: rts
1211: cmp_setn:
1212: or.l #neg_mask,USER_FPSR(a6)
1213: rts
1214:
1215: *
1216: * Inst is fmul.
1217: *
1218: wrap_mul:
1219: cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
1220: beq force_unf ;force an underflow (really!)
1221: *
1222: * One of the ops is denormalized. Test for wrap condition
1223: * and complete the instruction.
1224: *
1225: cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
1226: bne.b mul_srcd
1227: mul_destd:
1228: bsr.l ckinf_ns
1229: bne fix_stk
1230: bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
1231: bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
1232: add.l d1,d0 ;subtract dest from src
1233: bgt fix_stk
1234: bra force_unf
1235: mul_srcd:
1236: bsr.l ckinf_nd
1237: bne fix_stk
1238: bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
1239: bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
1240: add.l d1,d0 ;subtract src from dest
1241: bgt fix_stk
1242:
1243: *
1244: * This code handles the case of the instruction resulting in
1245: * an underflow condition.
1246: *
1247: force_unf:
1248: bclr.b #E1,E_BYTE(a6)
1249: or.l #unfinx_mask,USER_FPSR(a6)
1250: clr.w NMNEXC(a6)
1251: clr.b WBTEMP_SGN(a6)
1252: move.w ETEMP_EX(a6),d0 ;find the sign of the result
1253: move.w FPTEMP_EX(a6),d1
1254: eor.w d1,d0
1255: andi.w #$8000,d0
1256: beq.b frcunfcont
1257: st.b WBTEMP_SGN(a6)
1258: frcunfcont:
1259: lea WBTEMP(a6),a0 ;point a0 to memory location
1260: move.w CMDREG1B(a6),d0
1261: btst.l #6,d0 ;test for forced precision
1262: beq.b frcunf_fpcr
1263: btst.l #2,d0 ;check for double
1264: bne.b frcunf_dbl
1265: move.l #$1,d0 ;inst is forced single
1266: bra.b frcunf_rnd
1267: frcunf_dbl:
1268: move.l #$2,d0 ;inst is forced double
1269: bra.b frcunf_rnd
1270: frcunf_fpcr:
1271: bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
1272: frcunf_rnd:
1273: bsr.l unf_sub ;get correct result based on
1274: * ;round precision/mode. This
1275: * ;sets FPSR_CC correctly
1276: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1277: beq.b frcfpn
1278: bset.b #sign_bit,WBTEMP_EX(a6)
1279: bra frcfpn
1280:
1281: *
1282: * Write the result to the user's fpn. All results must be HUGE to be
1283: * written; otherwise the results would have overflowed or underflowed.
1284: * If the rounding precision is single or double, the ovf_res routine
1285: * is needed to correctly supply the max value.
1286: *
1287: frcfpnr:
1288: move.w CMDREG1B(a6),d0
1289: btst.l #6,d0 ;test for forced precision
1290: beq.b frcfpn_fpcr
1291: btst.l #2,d0 ;check for double
1292: bne.b frcfpn_dbl
1293: move.l #$1,d0 ;inst is forced single
1294: bra.b frcfpn_rnd
1295: frcfpn_dbl:
1296: move.l #$2,d0 ;inst is forced double
1297: bra.b frcfpn_rnd
1298: frcfpn_fpcr:
1299: bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
1300: tst.b d0
1301: beq.b frcfpn ;if extended, write what you got
1302: frcfpn_rnd:
1303: bclr.b #sign_bit,WBTEMP_EX(a6)
1304: sne WBTEMP_SGN(a6)
1305: bsr.l ovf_res ;get correct result based on
1306: * ;round precision/mode. This
1307: * ;sets FPSR_CC correctly
1308: bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
1309: beq.b frcfpn_clr
1310: bset.b #sign_bit,WBTEMP_EX(a6)
1311: frcfpn_clr:
1312: or.l #ovfinx_mask,USER_FPSR(a6)
1313: *
1314: * Perform the write.
1315: *
1316: frcfpn:
1317: bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register
1318: cmpi.b #3,d0
1319: ble.b frc0123 ;check if dest is fp0-fp3
1320: move.l #7,d1
1321: sub.l d0,d1
1322: clr.l d0
1323: bset.l d1,d0
1324: fmovem.x WBTEMP(a6),d0
1325: rts
1326: frc0123:
1327: tst.b d0
1328: beq.b frc0_dst
1329: cmpi.b #1,d0
1330: beq.b frc1_dst
1331: cmpi.b #2,d0
1332: beq.b frc2_dst
1333: frc3_dst:
1334: move.l WBTEMP_EX(a6),USER_FP3(a6)
1335: move.l WBTEMP_HI(a6),USER_FP3+4(a6)
1336: move.l WBTEMP_LO(a6),USER_FP3+8(a6)
1337: rts
1338: frc2_dst:
1339: move.l WBTEMP_EX(a6),USER_FP2(a6)
1340: move.l WBTEMP_HI(a6),USER_FP2+4(a6)
1341: move.l WBTEMP_LO(a6),USER_FP2+8(a6)
1342: rts
1343: frc1_dst:
1344: move.l WBTEMP_EX(a6),USER_FP1(a6)
1345: move.l WBTEMP_HI(a6),USER_FP1+4(a6)
1346: move.l WBTEMP_LO(a6),USER_FP1+8(a6)
1347: rts
1348: frc0_dst:
1349: move.l WBTEMP_EX(a6),USER_FP0(a6)
1350: move.l WBTEMP_HI(a6),USER_FP0+4(a6)
1351: move.l WBTEMP_LO(a6),USER_FP0+8(a6)
1352: rts
1353:
1354: *
1355: * Write etemp to fpn.
1356: * A check is made on enabled and signalled snan exceptions,
1357: * and the destination is not overwritten if this condition exists.
1358: * This code is designed to make fmoveins of unsupported data types
1359: * faster.
1360: *
1361: wr_etemp:
1362: btst.b #snan_bit,FPSR_EXCEPT(a6) ;if snan is set, and
1363: beq.b fmoveinc ;enabled, force restore
1364: btst.b #snan_bit,FPCR_ENABLE(a6) ;and don't overwrite
1365: beq.b fmoveinc ;the dest
1366: move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
1367: * ;snan handler
1368: tst.b ETEMP(a6) ;check for negative
1369: blt.b snan_neg
1370: rts
1371: snan_neg:
1372: or.l #neg_bit,USER_FPSR(a6) ;snan is negative; set N
1373: rts
1374: fmoveinc:
1375: clr.w NMNEXC(a6)
1376: bclr.b #E1,E_BYTE(a6)
1377: move.b STAG(a6),d0 ;check if stag is inf
1378: andi.b #$e0,d0
1379: cmpi.b #$40,d0
1380: bne.b fminc_cnan
1381: or.l #inf_mask,USER_FPSR(a6) ;if inf, nothing yet has set I
1382: tst.w LOCAL_EX(a0) ;check sign
1383: bge.b fminc_con
1384: or.l #neg_mask,USER_FPSR(a6)
1385: bra fminc_con
1386: fminc_cnan:
1387: cmpi.b #$60,d0 ;check if stag is NaN
1388: bne.b fminc_czero
1389: or.l #nan_mask,USER_FPSR(a6) ;if nan, nothing yet has set NaN
1390: move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
1391: * ;snan handler
1392: tst.w LOCAL_EX(a0) ;check sign
1393: bge.b fminc_con
1394: or.l #neg_mask,USER_FPSR(a6)
1395: bra fminc_con
1396: fminc_czero:
1397: cmpi.b #$20,d0 ;check if zero
1398: bne.b fminc_con
1399: or.l #z_mask,USER_FPSR(a6) ;if zero, set Z
1400: tst.w LOCAL_EX(a0) ;check sign
1401: bge.b fminc_con
1402: or.l #neg_mask,USER_FPSR(a6)
1403: fminc_con:
1404: bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register
1405: cmpi.b #3,d0
1406: ble.b fp0123 ;check if dest is fp0-fp3
1407: move.l #7,d1
1408: sub.l d0,d1
1409: clr.l d0
1410: bset.l d1,d0
1411: fmovem.x ETEMP(a6),d0
1412: rts
1413:
1414: fp0123:
1415: tst.b d0
1416: beq.b fp0_dst
1417: cmpi.b #1,d0
1418: beq.b fp1_dst
1419: cmpi.b #2,d0
1420: beq.b fp2_dst
1421: fp3_dst:
1422: move.l ETEMP_EX(a6),USER_FP3(a6)
1423: move.l ETEMP_HI(a6),USER_FP3+4(a6)
1424: move.l ETEMP_LO(a6),USER_FP3+8(a6)
1425: rts
1426: fp2_dst:
1427: move.l ETEMP_EX(a6),USER_FP2(a6)
1428: move.l ETEMP_HI(a6),USER_FP2+4(a6)
1429: move.l ETEMP_LO(a6),USER_FP2+8(a6)
1430: rts
1431: fp1_dst:
1432: move.l ETEMP_EX(a6),USER_FP1(a6)
1433: move.l ETEMP_HI(a6),USER_FP1+4(a6)
1434: move.l ETEMP_LO(a6),USER_FP1+8(a6)
1435: rts
1436: fp0_dst:
1437: move.l ETEMP_EX(a6),USER_FP0(a6)
1438: move.l ETEMP_HI(a6),USER_FP0+4(a6)
1439: move.l ETEMP_LO(a6),USER_FP0+8(a6)
1440: rts
1441:
1442: opclass3:
1443: st.b CU_ONLY(a6)
1444: move.w CMDREG1B(a6),d0 ;check if packed moveout
1445: andi.w #$0c00,d0 ;isolate last 2 bits of size field
1446: cmpi.w #$0c00,d0 ;if size is 011 or 111, it is packed
1447: beq.w pack_out ;else it is norm or denorm
1448: bra.w mv_out
1449:
1450:
1451: *
1452: * MOVE OUT
1453: *
1454:
1455: mv_tbl:
1456: dc.l li
1457: dc.l sgp
1458: dc.l xp
1459: dc.l mvout_end ;should never be taken
1460: dc.l wi
1461: dc.l dp
1462: dc.l bi
1463: dc.l mvout_end ;should never be taken
1464: mv_out:
1465: bfextu CMDREG1B(a6){3:3},d1 ;put source specifier in d1
1466: lea.l mv_tbl,a0
1467: move.l (a0,d1*4),a0
1468: jmp (a0)
1469:
1470: *
1471: * This exit is for move-out to memory. The aunfl bit is
1472: * set if the result is inex and unfl is signalled.
1473: *
1474: mvout_end:
1475: btst.b #inex2_bit,FPSR_EXCEPT(a6)
1476: beq.b no_aufl
1477: btst.b #unfl_bit,FPSR_EXCEPT(a6)
1478: beq.b no_aufl
1479: bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
1480: no_aufl:
1481: clr.w NMNEXC(a6)
1482: bclr.b #E1,E_BYTE(a6)
1483: fmove.l #0,FPSR ;clear any cc bits from res_func
1484: *
1485: * Return ETEMP to extended format from internal extended format so
1486: * that gen_except will have a correctly signed value for ovfl/unfl
1487: * handlers.
1488: *
1489: bfclr ETEMP_SGN(a6){0:8}
1490: beq.b mvout_con
1491: bset.b #sign_bit,ETEMP_EX(a6)
1492: mvout_con:
1493: rts
1494: *
1495: * This exit is for move-out to int register. The aunfl bit is
1496: * not set in any case for this move.
1497: *
1498: mvouti_end:
1499: clr.w NMNEXC(a6)
1500: bclr.b #E1,E_BYTE(a6)
1501: fmove.l #0,FPSR ;clear any cc bits from res_func
1502: *
1503: * Return ETEMP to extended format from internal extended format so
1504: * that gen_except will have a correctly signed value for ovfl/unfl
1505: * handlers.
1506: *
1507: bfclr ETEMP_SGN(a6){0:8}
1508: beq.b mvouti_con
1509: bset.b #sign_bit,ETEMP_EX(a6)
1510: mvouti_con:
1511: rts
1512: *
1513: * li is used to handle a long integer source specifier
1514: *
1515:
1516: li:
1517: moveq.l #4,d0 ;set byte count
1518:
1519: btst.b #7,STAG(a6) ;check for extended denorm
1520: bne.w int_dnrm ;if so, branch
1521:
1522: fmovem.x ETEMP(a6),fp0
1523: fcmp.d #:41dfffffffc00000,fp0
1524: * 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
1525: fbge.w lo_plrg
1526: fcmp.d #:c1e0000000000000,fp0
1527: * c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
1528: fble.w lo_nlrg
1529: *
1530: * at this point, the answer is between the largest pos and neg values
1531: *
1532: move.l USER_FPCR(a6),d1 ;use user's rounding mode
1533: andi.l #$30,d1
1534: fmove.l d1,fpcr
1535: fmove.l fp0,L_SCR1(a6) ;let the 040 perform conversion
1536: fmove.l fpsr,d1
1537: or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
1538: bra.w int_wrt
1539:
1540:
1541: lo_plrg:
1542: move.l #$7fffffff,L_SCR1(a6) ;answer is largest positive int
1543: fbeq.w int_wrt ;exact answer
1544: fcmp.d #:41dfffffffe00000,fp0
1545: * 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
1546: fbge.w int_operr ;set operr
1547: bra.w int_inx ;set inexact
1548:
1549: lo_nlrg:
1550: move.l #$80000000,L_SCR1(a6)
1551: fbeq.w int_wrt ;exact answer
1552: fcmp.d #:c1e0000000100000,fp0
1553: * c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
1554: fblt.w int_operr ;set operr
1555: bra.w int_inx ;set inexact
1556:
1557: *
1558: * wi is used to handle a word integer source specifier
1559: *
1560:
1561: wi:
1562: moveq.l #2,d0 ;set byte count
1563:
1564: btst.b #7,STAG(a6) ;check for extended denorm
1565: bne.w int_dnrm ;branch if so
1566:
1567: fmovem.x ETEMP(a6),fp0
1568: fcmp.s #:46fffe00,fp0
1569: * 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
1570: fbge.w wo_plrg
1571: fcmp.s #:c7000000,fp0
1572: * c7000000 in sgl prec = c00e00008000000000000000 in ext prec
1573: fble.w wo_nlrg
1574:
1575: *
1576: * at this point, the answer is between the largest pos and neg values
1577: *
1578: move.l USER_FPCR(a6),d1 ;use user's rounding mode
1579: andi.l #$30,d1
1580: fmove.l d1,fpcr
1581: fmove.w fp0,L_SCR1(a6) ;let the 040 perform conversion
1582: fmove.l fpsr,d1
1583: or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
1584: bra.w int_wrt
1585:
1586: wo_plrg:
1587: move.w #$7fff,L_SCR1(a6) ;answer is largest positive int
1588: fbeq.w int_wrt ;exact answer
1589: fcmp.s #:46ffff00,fp0
1590: * 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
1591: fbge.w int_operr ;set operr
1592: bra.w int_inx ;set inexact
1593:
1594: wo_nlrg:
1595: move.w #$8000,L_SCR1(a6)
1596: fbeq.w int_wrt ;exact answer
1597: fcmp.s #:c7000080,fp0
1598: * c7000080 in sgl prec = c00e00008000800000000000 in ext prec
1599: fblt.w int_operr ;set operr
1600: bra.w int_inx ;set inexact
1601:
1602: *
1603: * bi is used to handle a byte integer source specifier
1604: *
1605:
1606: bi:
1607: moveq.l #1,d0 ;set byte count
1608:
1609: btst.b #7,STAG(a6) ;check for extended denorm
1610: bne.w int_dnrm ;branch if so
1611:
1612: fmovem.x ETEMP(a6),fp0
1613: fcmp.s #:42fe0000,fp0
1614: * 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
1615: fbge.w by_plrg
1616: fcmp.s #:c3000000,fp0
1617: * c3000000 in sgl prec = c00600008000000000000000 in ext prec
1618: fble.w by_nlrg
1619:
1620: *
1621: * at this point, the answer is between the largest pos and neg values
1622: *
1623: move.l USER_FPCR(a6),d1 ;use user's rounding mode
1624: andi.l #$30,d1
1625: fmove.l d1,fpcr
1626: fmove.b fp0,L_SCR1(a6) ;let the 040 perform conversion
1627: fmove.l fpsr,d1
1628: or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
1629: bra.w int_wrt
1630:
1631: by_plrg:
1632: move.b #$7f,L_SCR1(a6) ;answer is largest positive int
1633: fbeq.w int_wrt ;exact answer
1634: fcmp.s #:42ff0000,fp0
1635: * 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
1636: fbge.w int_operr ;set operr
1637: bra.w int_inx ;set inexact
1638:
1639: by_nlrg:
1640: move.b #$80,L_SCR1(a6)
1641: fbeq.w int_wrt ;exact answer
1642: fcmp.s #:c3008000,fp0
1643: * c3008000 in sgl prec = c00600008080000000000000 in ext prec
1644: fblt.w int_operr ;set operr
1645: bra.w int_inx ;set inexact
1646:
1647: *
1648: * Common integer routines
1649: *
1650: * int_drnrm---account for possible nonzero result for round up with positive
1651: * operand and round down for negative answer. In the first case (result = 1)
1652: * byte-width (store in d0) of result must be honored. In the second case,
1653: * -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
1654:
1655: int_dnrm:
1656: clr.l L_SCR1(a6) ; initialize result to 0
1657: bfextu FPCR_MODE(a6){2:2},d1 ; d1 is the rounding mode
1658: cmp.b #2,d1
1659: bmi.b int_inx ; if RN or RZ, done
1660: bne.b int_rp ; if RP, continue below
1661: tst.w ETEMP(a6) ; RM: store -1 in L_SCR1 if src is negative
1662: bpl.b int_inx ; otherwise result is 0
1663: move.l #-1,L_SCR1(a6)
1664: bra.b int_inx
1665: int_rp:
1666: tst.w ETEMP(a6) ; RP: store +1 of proper width in L_SCR1 if
1667: * ; source is greater than 0
1668: bmi.b int_inx ; otherwise, result is 0
1669: lea L_SCR1(a6),a1 ; a1 is address of L_SCR1
1670: adda.l d0,a1 ; offset by destination width -1
1671: suba.l #1,a1
1672: bset.b #0,(a1) ; set low bit at a1 address
1673: int_inx:
1674: ori.l #inx2a_mask,USER_FPSR(a6)
1675: bra.b int_wrt
1676: int_operr:
1677: fmovem.x fp0,FPTEMP(a6) ;FPTEMP must contain the extended
1678: * ;precision source that needs to be
1679: * ;converted to integer this is required
1680: * ;if the operr exception is enabled.
1681: * ;set operr/aiop (no inex2 on int ovfl)
1682:
1683: ori.l #opaop_mask,USER_FPSR(a6)
1684: * ;fall through to perform int_wrt
1685: int_wrt:
1686: move.l EXC_EA(a6),a1 ;load destination address
1687: tst.l a1 ;check to see if it is a dest register
1688: beq.b wrt_dn ;write data register
1689: lea L_SCR1(a6),a0 ;point to supervisor source address
1690: bsr.l mem_write
1691: bra.w mvouti_end
1692:
1693: wrt_dn:
1694: move.l d0,-(sp) ;d0 currently contains the size to write
1695: bsr.l get_fline ;get_fline returns Dn in d0
1696: andi.w #$7,d0 ;isolate register
1697: move.l (sp)+,d1 ;get size
1698: cmpi.l #4,d1 ;most frequent case
1699: beq.b sz_long
1700: cmpi.l #2,d1
1701: bne.b sz_con
1702: or.l #8,d0 ;add 'word' size to register#
1703: bra.b sz_con
1704: sz_long:
1705: or.l #$10,d0 ;add 'long' size to register#
1706: sz_con:
1707: move.l d0,d1 ;reg_dest expects size:reg in d1
1708: bsr.l reg_dest ;load proper data register
1709: bra.w mvouti_end
1710: xp:
1711: lea ETEMP(a6),a0
1712: bclr.b #sign_bit,LOCAL_EX(a0)
1713: sne LOCAL_SGN(a0)
1714: btst.b #7,STAG(a6) ;check for extended denorm
1715: bne.w xdnrm
1716: clr.l d0
1717: bra.b do_fp ;do normal case
1718: sgp:
1719: lea ETEMP(a6),a0
1720: bclr.b #sign_bit,LOCAL_EX(a0)
1721: sne LOCAL_SGN(a0)
1722: btst.b #7,STAG(a6) ;check for extended denorm
1723: bne.w sp_catas ;branch if so
1724: move.w LOCAL_EX(a0),d0
1725: lea sp_bnds,a1
1726: cmp.w (a1),d0
1727: blt.w sp_under
1728: cmp.w 2(a1),d0
1729: bgt.w sp_over
1730: move.l #1,d0 ;set destination format to single
1731: bra.b do_fp ;do normal case
1732: dp:
1733: lea ETEMP(a6),a0
1734: bclr.b #sign_bit,LOCAL_EX(a0)
1735: sne LOCAL_SGN(a0)
1736:
1737: btst.b #7,STAG(a6) ;check for extended denorm
1738: bne.w dp_catas ;branch if so
1739:
1740: move.w LOCAL_EX(a0),d0
1741: lea dp_bnds,a1
1742:
1743: cmp.w (a1),d0
1744: blt.w dp_under
1745: cmp.w 2(a1),d0
1746: bgt.w dp_over
1747:
1748: move.l #2,d0 ;set destination format to double
1749: * ;fall through to do_fp
1750: *
1751: do_fp:
1752: bfextu FPCR_MODE(a6){2:2},d1 ;rnd mode in d1
1753: swap d0 ;rnd prec in upper word
1754: add.l d0,d1 ;d1 has PREC/MODE info
1755:
1756: clr.l d0 ;clear g,r,s
1757:
1758: bsr.l round ;round
1759:
1760: move.l a0,a1
1761: move.l EXC_EA(a6),a0
1762:
1763: bfextu CMDREG1B(a6){3:3},d1 ;extract destination format
1764: * ;at this point only the dest
1765: * ;formats sgl, dbl, ext are
1766: * ;possible
1767: cmp.b #2,d1
1768: bgt.b ddbl ;double=5, extended=2, single=1
1769: bne.b dsgl
1770: * ;fall through to dext
1771: dext:
1772: bsr.l dest_ext
1773: bra.w mvout_end
1774: dsgl:
1775: bsr.l dest_sgl
1776: bra.w mvout_end
1777: ddbl:
1778: bsr.l dest_dbl
1779: bra.w mvout_end
1780:
1781: *
1782: * Handle possible denorm or catastrophic underflow cases here
1783: *
1784: xdnrm:
1785: bsr.w set_xop ;initialize WBTEMP
1786: bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
1787:
1788: move.l a0,a1
1789: move.l EXC_EA(a6),a0 ;a0 has the destination pointer
1790: bsr.l dest_ext ;store to memory
1791: bset.b #unfl_bit,FPSR_EXCEPT(a6)
1792: bra.w mvout_end
1793:
1794: sp_under:
1795: bset.b #etemp15_bit,STAG(a6)
1796:
1797: cmp.w 4(a1),d0
1798: blt.b sp_catas ;catastrophic underflow case
1799:
1800: move.l #1,d0 ;load in round precision
1801: move.l #sgl_thresh,d1 ;load in single denorm threshold
1802: bsr.l dpspdnrm ;expects d1 to have the proper
1803: * ;denorm threshold
1804: bsr.l dest_sgl ;stores value to destination
1805: bset.b #unfl_bit,FPSR_EXCEPT(a6)
1806: bra.w mvout_end ;exit
1807:
1808: dp_under:
1809: bset.b #etemp15_bit,STAG(a6)
1810:
1811: cmp.w 4(a1),d0
1812: blt.b dp_catas ;catastrophic underflow case
1813:
1814: move.l #dbl_thresh,d1 ;load in double precision threshold
1815: move.l #2,d0
1816: bsr.l dpspdnrm ;expects d1 to have proper
1817: * ;denorm threshold
1818: * ;expects d0 to have round precision
1819: bsr.l dest_dbl ;store value to destination
1820: bset.b #unfl_bit,FPSR_EXCEPT(a6)
1821: bra.w mvout_end ;exit
1822:
1823: *
1824: * Handle catastrophic underflow cases here
1825: *
1826: sp_catas:
1827: * Temp fix for z bit set in unf_sub
1828: move.l USER_FPSR(a6),-(a7)
1829:
1830: move.l #1,d0 ;set round precision to sgl
1831:
1832: bsr.l unf_sub ;a0 points to result
1833:
1834: move.l (a7)+,USER_FPSR(a6)
1835:
1836: move.l #1,d0
1837: sub.w d0,LOCAL_EX(a0) ;account for difference between
1838: * ;denorm/norm bias
1839:
1840: move.l a0,a1 ;a1 has the operand input
1841: move.l EXC_EA(a6),a0 ;a0 has the destination pointer
1842:
1843: bsr.l dest_sgl ;store the result
1844: ori.l #unfinx_mask,USER_FPSR(a6)
1845: bra.w mvout_end
1846:
1847: dp_catas:
1848: * Temp fix for z bit set in unf_sub
1849: move.l USER_FPSR(a6),-(a7)
1850:
1851: move.l #2,d0 ;set round precision to dbl
1852: bsr.l unf_sub ;a0 points to result
1853:
1854: move.l (a7)+,USER_FPSR(a6)
1855:
1856: move.l #1,d0
1857: sub.w d0,LOCAL_EX(a0) ;account for difference between
1858: * ;denorm/norm bias
1859:
1860: move.l a0,a1 ;a1 has the operand input
1861: move.l EXC_EA(a6),a0 ;a0 has the destination pointer
1862:
1863: bsr.l dest_dbl ;store the result
1864: ori.l #unfinx_mask,USER_FPSR(a6)
1865: bra.w mvout_end
1866:
1867: *
1868: * Handle catastrophic overflow cases here
1869: *
1870: sp_over:
1871: * Temp fix for z bit set in unf_sub
1872: move.l USER_FPSR(a6),-(a7)
1873:
1874: move.l #1,d0
1875: lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result
1876: move.l ETEMP_EX(a6),(a0)
1877: move.l ETEMP_HI(a6),4(a0)
1878: move.l ETEMP_LO(a6),8(a0)
1879: bsr.l ovf_res
1880:
1881: move.l (a7)+,USER_FPSR(a6)
1882:
1883: move.l a0,a1
1884: move.l EXC_EA(a6),a0
1885: bsr.l dest_sgl
1886: or.l #ovfinx_mask,USER_FPSR(a6)
1887: bra.w mvout_end
1888:
1889: dp_over:
1890: * Temp fix for z bit set in ovf_res
1891: move.l USER_FPSR(a6),-(a7)
1892:
1893: move.l #2,d0
1894: lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result
1895: move.l ETEMP_EX(a6),(a0)
1896: move.l ETEMP_HI(a6),4(a0)
1897: move.l ETEMP_LO(a6),8(a0)
1898: bsr.l ovf_res
1899:
1900: move.l (a7)+,USER_FPSR(a6)
1901:
1902: move.l a0,a1
1903: move.l EXC_EA(a6),a0
1904: bsr.l dest_dbl
1905: or.l #ovfinx_mask,USER_FPSR(a6)
1906: bra.w mvout_end
1907:
1908: *
1909: * DPSPDNRM
1910: *
1911: * This subroutine takes an extended normalized number and denormalizes
1912: * it to the given round precision. This subroutine also decrements
1913: * the input operand's exponent by 1 to account for the fact that
1914: * dest_sgl or dest_dbl expects a normalized number's bias.
1915: *
1916: * Input: a0 points to a normalized number in internal extended format
1917: * d0 is the round precision (=1 for sgl; =2 for dbl)
1918: * d1 is the single precision or double precision
1919: * denorm threshold
1920: *
1921: * Output: (In the format for dest_sgl or dest_dbl)
1922: * a0 points to the destination
1923: * a1 points to the operand
1924: *
1925: * Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
1926: *
1927: dpspdnrm:
1928: move.l d0,-(a7) ;save round precision
1929: clr.l d0 ;clear initial g,r,s
1930: bsr.l dnrm_lp ;careful with d0, it's needed by round
1931:
1932: bfextu FPCR_MODE(a6){2:2},d1 ;get rounding mode
1933: swap d1
1934: move.w 2(a7),d1 ;set rounding precision
1935: swap d1 ;at this point d1 has PREC/MODE info
1936: bsr.l round ;round result, sets the inex bit in
1937: * ;USER_FPSR if needed
1938:
1939: move.w #1,d0
1940: sub.w d0,LOCAL_EX(a0) ;account for difference in denorm
1941: * ;vs norm bias
1942:
1943: move.l a0,a1 ;a1 has the operand input
1944: move.l EXC_EA(a6),a0 ;a0 has the destination pointer
1945: addq.l #4,a7 ;pop stack
1946: rts
1947: *
1948: * SET_XOP initialized WBTEMP with the value pointed to by a0
1949: * input: a0 points to input operand in the internal extended format
1950: *
1951: set_xop:
1952: move.l LOCAL_EX(a0),WBTEMP_EX(a6)
1953: move.l LOCAL_HI(a0),WBTEMP_HI(a6)
1954: move.l LOCAL_LO(a0),WBTEMP_LO(a6)
1955: bfclr WBTEMP_SGN(a6){0:8}
1956: beq.b sxop
1957: bset.b #sign_bit,WBTEMP_EX(a6)
1958: sxop:
1959: bfclr STAG(a6){5:4} ;clear wbtm66,wbtm1,wbtm0,sbit
1960: rts
1961: *
1962: * P_MOVE
1963: *
1964: p_movet:
1965: dc.l p_move
1966: dc.l p_movez
1967: dc.l p_movei
1968: dc.l p_moven
1969: dc.l p_move
1970: p_regd:
1971: dc.l p_dyd0
1972: dc.l p_dyd1
1973: dc.l p_dyd2
1974: dc.l p_dyd3
1975: dc.l p_dyd4
1976: dc.l p_dyd5
1977: dc.l p_dyd6
1978: dc.l p_dyd7
1979:
1980: pack_out:
1981: lea.l p_movet,a0 ;load jmp table address
1982: move.w STAG(a6),d0 ;get source tag
1983: bfextu d0{16:3},d0 ;isolate source bits
1984: move.l (a0,d0.w*4),a0 ;load a0 with routine label for tag
1985: jmp (a0) ;go to the routine
1986:
1987: p_write:
1988: move.l #$0c,d0 ;get byte count
1989: move.l EXC_EA(a6),a1 ;get the destination address
1990: bsr mem_write ;write the user's destination
1991: clr.b CU_SAVEPC(a6) ;set the cu save pc to all 0's
1992:
1993: *
1994: * Also note that the dtag must be set to norm here - this is because
1995: * the 040 uses the dtag to execute the correct microcode.
1996: *
1997: bfclr DTAG(a6){0:3} ;set dtag to norm
1998:
1999: rts
2000:
2001: * Notes on handling of special case (zero, inf, and nan) inputs:
2002: * 1. Operr is not signalled if the k-factor is greater than 18.
2003: * 2. Per the manual, status bits are not set.
2004: *
2005:
2006: p_move:
2007: move.w CMDREG1B(a6),d0
2008: btst.l #kfact_bit,d0 ;test for dynamic k-factor
2009: beq.b statick ;if clear, k-factor is static
2010: dynamick:
2011: bfextu d0{25:3},d0 ;isolate register for dynamic k-factor
2012: lea p_regd,a0
2013: move.l (a0,d0*4),a0
2014: jmp (a0)
2015: statick:
2016: andi.w #$007f,d0 ;get k-factor
2017: bfexts d0{25:7},d0 ;sign extend d0 for bindec
2018: lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
2019: bsr.l bindec ;perform the convert; data at a6
2020: lea.l FP_SCR1(a6),a0 ;load a0 with result address
2021: bra.l p_write
2022: p_movez:
2023: lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
2024: clr.w 2(a0) ;clear lower word of exp
2025: clr.l 4(a0) ;load second lword of ZERO
2026: clr.l 8(a0) ;load third lword of ZERO
2027: bra.w p_write ;go write results
2028: p_movei:
2029: fmove.l #0,FPSR ;clear aiop
2030: lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
2031: clr.w 2(a0) ;clear lower word of exp
2032: bra.w p_write ;go write the result
2033: p_moven:
2034: lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
2035: clr.w 2(a0) ;clear lower word of exp
2036: bra.w p_write ;go write the result
2037:
2038: *
2039: * Routines to read the dynamic k-factor from Dn.
2040: *
2041: p_dyd0:
2042: move.l USER_D0(a6),d0
2043: bra.b statick
2044: p_dyd1:
2045: move.l USER_D1(a6),d0
2046: bra.b statick
2047: p_dyd2:
2048: move.l d2,d0
2049: bra.b statick
2050: p_dyd3:
2051: move.l d3,d0
2052: bra.b statick
2053: p_dyd4:
2054: move.l d4,d0
2055: bra.b statick
2056: p_dyd5:
2057: move.l d5,d0
2058: bra.b statick
2059: p_dyd6:
2060: move.l d6,d0
2061: bra.w statick
2062: p_dyd7:
2063: move.l d7,d0
2064: bra.w statick
2065:
2066: end
CVSweb