Annotation of sys/arch/m68k/060sp/pfpsp.s, Revision 1.1.1.1
1.1 nbrk 1: #
2: # $OpenBSD: pfpsp.s,v 1.8 2007/04/10 17:47:54 miod Exp $
3: # $NetBSD: pfpsp.s,v 1.2 1996/05/15 19:49:12 is Exp $
4: #
5:
6: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7: # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
8: # M68000 Hi-Performance Microprocessor Division
9: # M68060 Software Package Production Release
10: #
11: # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
12: # All rights reserved.
13: #
14: # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
15: # To the maximum extent permitted by applicable law,
16: # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
17: # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
18: # FOR A PARTICULAR PURPOSE and any warranty against infringement with
19: # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
20: # and any accompanying written materials.
21: #
22: # To the maximum extent permitted by applicable law,
23: # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
24: # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
25: # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
26: # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27: #
28: # Motorola assumes no responsibility for the maintenance and support
29: # of the SOFTWARE.
30: #
31: # You are hereby granted a copyright license to use, modify, and distribute the
32: # SOFTWARE so long as this entire notice is retained without alteration
33: # in any modified and/or redistributed versions, and that such modified
34: # versions are clearly identified as such.
35: # No licenses are granted by implication, estoppel or otherwise under any
36: # patents or trademarks of Motorola, Inc.
37: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38:
39: #
40: # freal.s:
41: # This file is appended to the top of the 060FPSP package
42: # and contains the entry points into the package. The user, in
43: # effect, branches to one of the branch table entries located
44: # after _060FPSP_TABLE.
45: # Also, subroutine stubs exist in this file (_fpsp_done for
46: # example) that are referenced by the FPSP package itself in order
47: # to call a given routine. The stub routine actually performs the
48: # callout. The FPSP code does a "bsr" to the stub routine. This
49: # extra layer of hierarchy adds a slight performance penalty but
50: # it makes the FPSP code easier to read and more mainatinable.
51: #
52:
53: set _off_bsun, 0x00
54: set _off_snan, 0x04
55: set _off_operr, 0x08
56: set _off_ovfl, 0x0c
57: set _off_unfl, 0x10
58: set _off_dz, 0x14
59: set _off_inex, 0x18
60: set _off_fline, 0x1c
61: set _off_fpu_dis, 0x20
62: set _off_trap, 0x24
63: set _off_trace, 0x28
64: set _off_access, 0x2c
65: set _off_done, 0x30
66:
67: set _off_imr, 0x40
68: set _off_dmr, 0x44
69: set _off_dmw, 0x48
70: set _off_irw, 0x4c
71: set _off_irl, 0x50
72: set _off_drb, 0x54
73: set _off_drw, 0x58
74: set _off_drl, 0x5c
75: set _off_dwb, 0x60
76: set _off_dww, 0x64
77: set _off_dwl, 0x68
78:
79: _060FPSP_TABLE:
80:
81: ###############################################################
82:
83: # Here's the table of ENTRY POINTS for those linking the package.
84: bra.l _fpsp_snan
85: short 0x0000
86: bra.l _fpsp_operr
87: short 0x0000
88: bra.l _fpsp_ovfl
89: short 0x0000
90: bra.l _fpsp_unfl
91: short 0x0000
92: bra.l _fpsp_dz
93: short 0x0000
94: bra.l _fpsp_inex
95: short 0x0000
96: bra.l _fpsp_fline
97: short 0x0000
98: bra.l _fpsp_unsupp
99: short 0x0000
100: bra.l _fpsp_effadd
101: short 0x0000
102:
103: space 56
104:
105: ###############################################################
106: global _fpsp_done
107: _fpsp_done:
108: mov.l %d0,-(%sp)
109: mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
110: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
111: mov.l 0x4(%sp),%d0
112: rtd &0x4
113:
114: global _real_ovfl
115: _real_ovfl:
116: mov.l %d0,-(%sp)
117: mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
118: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
119: mov.l 0x4(%sp),%d0
120: rtd &0x4
121:
122: global _real_unfl
123: _real_unfl:
124: mov.l %d0,-(%sp)
125: mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
126: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
127: mov.l 0x4(%sp),%d0
128: rtd &0x4
129:
130: global _real_inex
131: _real_inex:
132: mov.l %d0,-(%sp)
133: mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
134: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
135: mov.l 0x4(%sp),%d0
136: rtd &0x4
137:
138: global _real_bsun
139: _real_bsun:
140: mov.l %d0,-(%sp)
141: mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
142: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
143: mov.l 0x4(%sp),%d0
144: rtd &0x4
145:
146: global _real_operr
147: _real_operr:
148: mov.l %d0,-(%sp)
149: mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
150: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
151: mov.l 0x4(%sp),%d0
152: rtd &0x4
153:
154: global _real_snan
155: _real_snan:
156: mov.l %d0,-(%sp)
157: mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
158: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
159: mov.l 0x4(%sp),%d0
160: rtd &0x4
161:
162: global _real_dz
163: _real_dz:
164: mov.l %d0,-(%sp)
165: mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
166: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
167: mov.l 0x4(%sp),%d0
168: rtd &0x4
169:
170: global _real_fline
171: _real_fline:
172: mov.l %d0,-(%sp)
173: mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
174: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
175: mov.l 0x4(%sp),%d0
176: rtd &0x4
177:
178: global _real_fpu_disabled
179: _real_fpu_disabled:
180: mov.l %d0,-(%sp)
181: mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
182: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
183: mov.l 0x4(%sp),%d0
184: rtd &0x4
185:
186: global _real_trap
187: _real_trap:
188: mov.l %d0,-(%sp)
189: mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
190: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
191: mov.l 0x4(%sp),%d0
192: rtd &0x4
193:
194: global _real_trace
195: _real_trace:
196: mov.l %d0,-(%sp)
197: mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
198: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
199: mov.l 0x4(%sp),%d0
200: rtd &0x4
201:
202: global _real_access
203: _real_access:
204: mov.l %d0,-(%sp)
205: mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
206: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
207: mov.l 0x4(%sp),%d0
208: rtd &0x4
209:
210: #######################################
211:
212: global _imem_read
213: _imem_read:
214: mov.l %d0,-(%sp)
215: mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
216: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
217: mov.l 0x4(%sp),%d0
218: rtd &0x4
219:
220: global _dmem_read
221: _dmem_read:
222: mov.l %d0,-(%sp)
223: mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
224: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
225: mov.l 0x4(%sp),%d0
226: rtd &0x4
227:
228: global _dmem_write
229: _dmem_write:
230: mov.l %d0,-(%sp)
231: mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
232: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
233: mov.l 0x4(%sp),%d0
234: rtd &0x4
235:
236: global _imem_read_word
237: _imem_read_word:
238: mov.l %d0,-(%sp)
239: mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
240: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
241: mov.l 0x4(%sp),%d0
242: rtd &0x4
243:
244: global _imem_read_long
245: _imem_read_long:
246: mov.l %d0,-(%sp)
247: mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
248: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
249: mov.l 0x4(%sp),%d0
250: rtd &0x4
251:
252: global _dmem_read_byte
253: _dmem_read_byte:
254: mov.l %d0,-(%sp)
255: mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
256: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
257: mov.l 0x4(%sp),%d0
258: rtd &0x4
259:
260: global _dmem_read_word
261: _dmem_read_word:
262: mov.l %d0,-(%sp)
263: mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
264: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
265: mov.l 0x4(%sp),%d0
266: rtd &0x4
267:
268: global _dmem_read_long
269: _dmem_read_long:
270: mov.l %d0,-(%sp)
271: mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
272: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
273: mov.l 0x4(%sp),%d0
274: rtd &0x4
275:
276: global _dmem_write_byte
277: _dmem_write_byte:
278: mov.l %d0,-(%sp)
279: mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
280: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
281: mov.l 0x4(%sp),%d0
282: rtd &0x4
283:
284: global _dmem_write_word
285: _dmem_write_word:
286: mov.l %d0,-(%sp)
287: mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
288: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
289: mov.l 0x4(%sp),%d0
290: rtd &0x4
291:
292: global _dmem_write_long
293: _dmem_write_long:
294: mov.l %d0,-(%sp)
295: mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
296: pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
297: mov.l 0x4(%sp),%d0
298: rtd &0x4
299:
300: #
301: # This file contains a set of define statements for constants
302: # in order to promote readability within the corecode itself.
303: #
304:
305: set LOCAL_SIZE, 192 # stack frame size(bytes)
306: set LV, -LOCAL_SIZE # stack offset
307:
308: set EXC_SR, 0x4 # stack status register
309: set EXC_PC, 0x6 # stack pc
310: set EXC_VOFF, 0xa # stacked vector offset
311: set EXC_EA, 0xc # stacked <ea>
312:
313: set EXC_FP, 0x0 # frame pointer
314:
315: set EXC_AREGS, -68 # offset of all address regs
316: set EXC_DREGS, -100 # offset of all data regs
317: set EXC_FPREGS, -36 # offset of all fp regs
318:
319: set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
320: set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
321: set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
322: set EXC_A5, EXC_AREGS+(5*4)
323: set EXC_A4, EXC_AREGS+(4*4)
324: set EXC_A3, EXC_AREGS+(3*4)
325: set EXC_A2, EXC_AREGS+(2*4)
326: set EXC_A1, EXC_AREGS+(1*4)
327: set EXC_A0, EXC_AREGS+(0*4)
328: set EXC_D7, EXC_DREGS+(7*4)
329: set EXC_D6, EXC_DREGS+(6*4)
330: set EXC_D5, EXC_DREGS+(5*4)
331: set EXC_D4, EXC_DREGS+(4*4)
332: set EXC_D3, EXC_DREGS+(3*4)
333: set EXC_D2, EXC_DREGS+(2*4)
334: set EXC_D1, EXC_DREGS+(1*4)
335: set EXC_D0, EXC_DREGS+(0*4)
336:
337: set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
338: set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
339: set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
340:
341: set FP_SCR1, LV+80 # fp scratch 1
342: set FP_SCR1_EX, FP_SCR1+0
343: set FP_SCR1_SGN, FP_SCR1+2
344: set FP_SCR1_HI, FP_SCR1+4
345: set FP_SCR1_LO, FP_SCR1+8
346:
347: set FP_SCR0, LV+68 # fp scratch 0
348: set FP_SCR0_EX, FP_SCR0+0
349: set FP_SCR0_SGN, FP_SCR0+2
350: set FP_SCR0_HI, FP_SCR0+4
351: set FP_SCR0_LO, FP_SCR0+8
352:
353: set FP_DST, LV+56 # fp destination operand
354: set FP_DST_EX, FP_DST+0
355: set FP_DST_SGN, FP_DST+2
356: set FP_DST_HI, FP_DST+4
357: set FP_DST_LO, FP_DST+8
358:
359: set FP_SRC, LV+44 # fp source operand
360: set FP_SRC_EX, FP_SRC+0
361: set FP_SRC_SGN, FP_SRC+2
362: set FP_SRC_HI, FP_SRC+4
363: set FP_SRC_LO, FP_SRC+8
364:
365: set USER_FPIAR, LV+40 # FP instr address register
366:
367: set USER_FPSR, LV+36 # FP status register
368: set FPSR_CC, USER_FPSR+0 # FPSR condition codes
369: set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
370: set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
371: set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
372:
373: set USER_FPCR, LV+32 # FP control register
374: set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
375: set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
376:
377: set L_SCR3, LV+28 # integer scratch 3
378: set L_SCR2, LV+24 # integer scratch 2
379: set L_SCR1, LV+20 # integer scratch 1
380:
381: set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
382:
383: set EXC_TEMP2, LV+24 # temporary space
384: set EXC_TEMP, LV+16 # temporary space
385:
386: set DTAG, LV+15 # destination operand type
387: set STAG, LV+14 # source operand type
388:
389: set SPCOND_FLG, LV+10 # flag: special case (see below)
390:
391: set EXC_CC, LV+8 # saved condition codes
392: set EXC_EXTWPTR, LV+4 # saved current PC (active)
393: set EXC_EXTWORD, LV+2 # saved extension word
394: set EXC_CMDREG, LV+2 # saved extension word
395: set EXC_OPWORD, LV+0 # saved operation word
396:
397: ################################
398:
399: # Helpful macros
400:
401: set FTEMP, 0 # offsets within an
402: set FTEMP_EX, 0 # extended precision
403: set FTEMP_SGN, 2 # value saved in memory.
404: set FTEMP_HI, 4
405: set FTEMP_LO, 8
406: set FTEMP_GRS, 12
407:
408: set LOCAL, 0 # offsets within an
409: set LOCAL_EX, 0 # extended precision
410: set LOCAL_SGN, 2 # value saved in memory.
411: set LOCAL_HI, 4
412: set LOCAL_LO, 8
413: set LOCAL_GRS, 12
414:
415: set DST, 0 # offsets within an
416: set DST_EX, 0 # extended precision
417: set DST_HI, 4 # value saved in memory.
418: set DST_LO, 8
419:
420: set SRC, 0 # offsets within an
421: set SRC_EX, 0 # extended precision
422: set SRC_HI, 4 # value saved in memory.
423: set SRC_LO, 8
424:
425: set SGL_LO, 0x3f81 # min sgl prec exponent
426: set SGL_HI, 0x407e # max sgl prec exponent
427: set DBL_LO, 0x3c01 # min dbl prec exponent
428: set DBL_HI, 0x43fe # max dbl prec exponent
429: set EXT_LO, 0x0 # min ext prec exponent
430: set EXT_HI, 0x7ffe # max ext prec exponent
431:
432: set EXT_BIAS, 0x3fff # extended precision bias
433: set SGL_BIAS, 0x007f # single precision bias
434: set DBL_BIAS, 0x03ff # double precision bias
435:
436: set NORM, 0x00 # operand type for STAG/DTAG
437: set ZERO, 0x01 # operand type for STAG/DTAG
438: set INF, 0x02 # operand type for STAG/DTAG
439: set QNAN, 0x03 # operand type for STAG/DTAG
440: set DENORM, 0x04 # operand type for STAG/DTAG
441: set SNAN, 0x05 # operand type for STAG/DTAG
442: set UNNORM, 0x06 # operand type for STAG/DTAG
443:
444: ##################
445: # FPSR/FPCR bits #
446: ##################
447: set neg_bit, 0x3 # negative result
448: set z_bit, 0x2 # zero result
449: set inf_bit, 0x1 # infinite result
450: set nan_bit, 0x0 # NAN result
451:
452: set q_sn_bit, 0x7 # sign bit of quotient byte
453:
454: set bsun_bit, 7 # branch on unordered
455: set snan_bit, 6 # signalling NAN
456: set operr_bit, 5 # operand error
457: set ovfl_bit, 4 # overflow
458: set unfl_bit, 3 # underflow
459: set dz_bit, 2 # divide by zero
460: set inex2_bit, 1 # inexact result 2
461: set inex1_bit, 0 # inexact result 1
462:
463: set aiop_bit, 7 # accrued inexact operation bit
464: set aovfl_bit, 6 # accrued overflow bit
465: set aunfl_bit, 5 # accrued underflow bit
466: set adz_bit, 4 # accrued dz bit
467: set ainex_bit, 3 # accrued inexact bit
468:
469: #############################
470: # FPSR individual bit masks #
471: #############################
472: set neg_mask, 0x08000000 # negative bit mask (lw)
473: set inf_mask, 0x02000000 # infinity bit mask (lw)
474: set z_mask, 0x04000000 # zero bit mask (lw)
475: set nan_mask, 0x01000000 # nan bit mask (lw)
476:
477: set neg_bmask, 0x08 # negative bit mask (byte)
478: set inf_bmask, 0x02 # infinity bit mask (byte)
479: set z_bmask, 0x04 # zero bit mask (byte)
480: set nan_bmask, 0x01 # nan bit mask (byte)
481:
482: set bsun_mask, 0x00008000 # bsun exception mask
483: set snan_mask, 0x00004000 # snan exception mask
484: set operr_mask, 0x00002000 # operr exception mask
485: set ovfl_mask, 0x00001000 # overflow exception mask
486: set unfl_mask, 0x00000800 # underflow exception mask
487: set dz_mask, 0x00000400 # dz exception mask
488: set inex2_mask, 0x00000200 # inex2 exception mask
489: set inex1_mask, 0x00000100 # inex1 exception mask
490:
491: set aiop_mask, 0x00000080 # accrued illegal operation
492: set aovfl_mask, 0x00000040 # accrued overflow
493: set aunfl_mask, 0x00000020 # accrued underflow
494: set adz_mask, 0x00000010 # accrued divide by zero
495: set ainex_mask, 0x00000008 # accrued inexact
496:
497: ######################################
498: # FPSR combinations used in the FPSP #
499: ######################################
500: set dzinf_mask, inf_mask+dz_mask+adz_mask
501: set opnan_mask, nan_mask+operr_mask+aiop_mask
502: set nzi_mask, 0x01ffffff #clears N, Z, and I
503: set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
504: set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
505: set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
506: set inx1a_mask, inex1_mask+ainex_mask
507: set inx2a_mask, inex2_mask+ainex_mask
508: set snaniop_mask, nan_mask+snan_mask+aiop_mask
509: set snaniop2_mask, snan_mask+aiop_mask
510: set naniop_mask, nan_mask+aiop_mask
511: set neginf_mask, neg_mask+inf_mask
512: set infaiop_mask, inf_mask+aiop_mask
513: set negz_mask, neg_mask+z_mask
514: set opaop_mask, operr_mask+aiop_mask
515: set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
516: set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
517:
518: #########
519: # misc. #
520: #########
521: set rnd_stky_bit, 29 # stky bit pos in longword
522:
523: set sign_bit, 0x7 # sign bit
524: set signan_bit, 0x6 # signalling nan bit
525:
526: set sgl_thresh, 0x3f81 # minimum sgl exponent
527: set dbl_thresh, 0x3c01 # minimum dbl exponent
528:
529: set x_mode, 0x0 # extended precision
530: set s_mode, 0x4 # single precision
531: set d_mode, 0x8 # double precision
532:
533: set rn_mode, 0x0 # round-to-nearest
534: set rz_mode, 0x1 # round-to-zero
535: set rm_mode, 0x2 # round-tp-minus-infinity
536: set rp_mode, 0x3 # round-to-plus-infinity
537:
538: set mantissalen, 64 # length of mantissa in bits
539:
540: set BYTE, 1 # len(byte) == 1 byte
541: set WORD, 2 # len(word) == 2 bytes
542: set LONG, 4 # len(longword) == 2 bytes
543:
544: set BSUN_VEC, 0xc0 # bsun vector offset
545: set INEX_VEC, 0xc4 # inexact vector offset
546: set DZ_VEC, 0xc8 # dz vector offset
547: set UNFL_VEC, 0xcc # unfl vector offset
548: set OPERR_VEC, 0xd0 # operr vector offset
549: set OVFL_VEC, 0xd4 # ovfl vector offset
550: set SNAN_VEC, 0xd8 # snan vector offset
551:
552: ###########################
553: # SPecial CONDition FLaGs #
554: ###########################
555: set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
556: set fbsun_flg, 0x02 # flag bit: bsun exception
557: set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
558: set mda7_flg, 0x08 # flag bit: -(a7) <ea>
559: set fmovm_flg, 0x40 # flag bit: fmovm instruction
560: set immed_flg, 0x80 # flag bit: &<data> <ea>
561:
562: set ftrapcc_bit, 0x0
563: set fbsun_bit, 0x1
564: set mia7_bit, 0x2
565: set mda7_bit, 0x3
566: set immed_bit, 0x7
567:
568: ##################################
569: # TRANSCENDENTAL "LAST-OP" FLAGS #
570: ##################################
571: set FMUL_OP, 0x0 # fmul instr performed last
572: set FDIV_OP, 0x1 # fdiv performed last
573: set FADD_OP, 0x2 # fadd performed last
574: set FMOV_OP, 0x3 # fmov performed last
575:
576: #############
577: # CONSTANTS #
578: #############
579: T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
580: T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
581:
582: PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
583: PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
584:
585: TWOBYPI:
586: long 0x3FE45F30,0x6DC9C883
587:
588: #########################################################################
589: # XDEF **************************************************************** #
590: # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
591: # #
592: # This handler should be the first code executed upon taking the #
593: # FP Overflow exception in an operating system. #
594: # #
595: # XREF **************************************************************** #
596: # _imem_read_long() - read instruction longword #
597: # fix_skewed_ops() - adjust src operand in fsave frame #
598: # set_tag_x() - determine optype of src/dst operands #
599: # store_fpreg() - store opclass 0 or 2 result to FP regfile #
600: # unnorm_fix() - change UNNORM operands to NORM or ZERO #
601: # load_fpn2() - load dst operand from FP regfile #
602: # fout() - emulate an opclass 3 instruction #
603: # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
604: # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
605: # _real_ovfl() - "callout" for Overflow exception enabled code #
606: # _real_inex() - "callout" for Inexact exception enabled code #
607: # _real_trace() - "callout" for Trace exception code #
608: # #
609: # INPUT *************************************************************** #
610: # - The system stack contains the FP Ovfl exception stack frame #
611: # - The fsave frame contains the source operand #
612: # #
613: # OUTPUT ************************************************************** #
614: # Overflow Exception enabled: #
615: # - The system stack is unchanged #
616: # - The fsave frame contains the adjusted src op for opclass 0,2 #
617: # Overflow Exception disabled: #
618: # - The system stack is unchanged #
619: # - The "exception present" flag in the fsave frame is cleared #
620: # #
621: # ALGORITHM *********************************************************** #
622: # On the 060, if an FP overflow is present as the result of any #
623: # instruction, the 060 will take an overflow exception whether the #
624: # exception is enabled or disabled in the FPCR. For the disabled case, #
625: # This handler emulates the instruction to determine what the correct #
626: # default result should be for the operation. This default result is #
627: # then stored in either the FP regfile, data regfile, or memory. #
628: # Finally, the handler exits through the "callout" _fpsp_done() #
629: # denoting that no exceptional conditions exist within the machine. #
630: # If the exception is enabled, then this handler must create the #
631: # exceptional operand and plave it in the fsave state frame, and store #
632: # the default result (only if the instruction is opclass 3). For #
633: # exceptions enabled, this handler must exit through the "callout" #
634: # _real_ovfl() so that the operating system enabled overflow handler #
635: # can handle this case. #
636: # Two other conditions exist. First, if overflow was disabled #
637: # but the inexact exception was enabled, this handler must exit #
638: # through the "callout" _real_inex() regardless of whether the result #
639: # was inexact. #
640: # Also, in the case of an opclass three instruction where #
641: # overflow was disabled and the trace exception was enabled, this #
642: # handler must exit through the "callout" _real_trace(). #
643: # #
644: #########################################################################
645:
646: global _fpsp_ovfl
647: _fpsp_ovfl:
648:
649: #$# sub.l &24,%sp # make room for src/dst
650:
651: link.w %a6,&-LOCAL_SIZE # init stack frame
652:
653: fsave FP_SRC(%a6) # grab the "busy" frame
654:
655: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
656: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
657: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
658:
659: # the FPIAR holds the "current PC" of the faulting instruction
660: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
661: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
662: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
663: bsr.l _imem_read_long # fetch the instruction words
664: mov.l %d0,EXC_OPWORD(%a6)
665:
666: ##############################################################################
667:
668: btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
669: bne.w fovfl_out
670:
671:
672: lea FP_SRC(%a6),%a0 # pass: ptr to src op
673: bsr.l fix_skewed_ops # fix src op
674:
675: # since, I believe, only NORMs and DENORMs can come through here,
676: # maybe we can avoid the subroutine call.
677: lea FP_SRC(%a6),%a0 # pass: ptr to src op
678: bsr.l set_tag_x # tag the operand type
679: mov.b %d0,STAG(%a6) # maybe NORM,DENORM
680:
681: # bit five of the fp extension word separates the monadic and dyadic operations
682: # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
683: # will never take this exception.
684: btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
685: beq.b fovfl_extract # monadic
686:
687: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
688: bsr.l load_fpn2 # load dst into FP_DST
689:
690: lea FP_DST(%a6),%a0 # pass: ptr to dst op
691: bsr.l set_tag_x # tag the operand type
692: cmpi.b %d0,&UNNORM # is operand an UNNORM?
693: bne.b fovfl_op2_done # no
694: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
695: fovfl_op2_done:
696: mov.b %d0,DTAG(%a6) # save dst optype tag
697:
698: fovfl_extract:
699:
700: #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
701: #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
702: #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
703: #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
704: #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
705: #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
706:
707: clr.l %d0
708: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
709:
710: mov.b 1+EXC_CMDREG(%a6),%d1
711: andi.w &0x007f,%d1 # extract extension
712:
713: andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
714:
715: fmov.l &0x0,%fpcr # zero current control regs
716: fmov.l &0x0,%fpsr
717:
718: lea FP_SRC(%a6),%a0
719: lea FP_DST(%a6),%a1
720:
721: # maybe we can make these entry points ONLY the OVFL entry points of each routine.
722: mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
723: jsr (tbl_unsupp.l,%pc,%d1.l*1)
724:
725: # the operation has been emulated. the result is in fp0.
726: # the EXOP, if an exception occurred, is in fp1.
727: # we must save the default result regardless of whether
728: # traps are enabled or disabled.
729: bfextu EXC_CMDREG(%a6){&6:&3},%d0
730: bsr.l store_fpreg
731:
732: # the exceptional possibilities we have left ourselves with are ONLY overflow
733: # and inexact. and, the inexact is such that overflow occurred and was disabled
734: # but inexact was enabled.
735: btst &ovfl_bit,FPCR_ENABLE(%a6)
736: bne.b fovfl_ovfl_on
737:
738: btst &inex2_bit,FPCR_ENABLE(%a6)
739: bne.b fovfl_inex_on
740:
741: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
742: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
743: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
744:
745: unlk %a6
746: #$# add.l &24,%sp
747: bra.l _fpsp_done
748:
749: # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
750: # in fp1. now, simply jump to _real_ovfl()!
751: fovfl_ovfl_on:
752: fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
753:
754: mov.w &0xe005,2+FP_SRC(%a6) # save exc status
755:
756: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
757: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
758: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
759:
760: frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
761:
762: unlk %a6
763:
764: bra.l _real_ovfl
765:
766: # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
767: # we must jump to real_inex().
768: fovfl_inex_on:
769:
770: fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
771:
772: mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
773: mov.w &0xe001,2+FP_SRC(%a6) # save exc status
774:
775: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
776: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
777: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
778:
779: frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
780:
781: unlk %a6
782:
783: bra.l _real_inex
784:
785: ########################################################################
786: fovfl_out:
787:
788:
789: #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
790: #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
791: #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
792:
793: # the src operand is definitely a NORM(!), so tag it as such
794: mov.b &NORM,STAG(%a6) # set src optype tag
795:
796: clr.l %d0
797: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
798:
799: and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
800:
801: fmov.l &0x0,%fpcr # zero current control regs
802: fmov.l &0x0,%fpsr
803:
804: lea FP_SRC(%a6),%a0 # pass ptr to src operand
805:
806: bsr.l fout
807:
808: btst &ovfl_bit,FPCR_ENABLE(%a6)
809: bne.w fovfl_ovfl_on
810:
811: btst &inex2_bit,FPCR_ENABLE(%a6)
812: bne.w fovfl_inex_on
813:
814: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
815: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
816: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
817:
818: unlk %a6
819: #$# add.l &24,%sp
820:
821: btst &0x7,(%sp) # is trace on?
822: beq.l _fpsp_done # no
823:
824: fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
825: mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
826: bra.l _real_trace
827:
828: #########################################################################
829: # XDEF **************************************************************** #
830: # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
831: # #
832: # This handler should be the first code executed upon taking the #
833: # FP Underflow exception in an operating system. #
834: # #
835: # XREF **************************************************************** #
836: # _imem_read_long() - read instruction longword #
837: # fix_skewed_ops() - adjust src operand in fsave frame #
838: # set_tag_x() - determine optype of src/dst operands #
839: # store_fpreg() - store opclass 0 or 2 result to FP regfile #
840: # unnorm_fix() - change UNNORM operands to NORM or ZERO #
841: # load_fpn2() - load dst operand from FP regfile #
842: # fout() - emulate an opclass 3 instruction #
843: # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
844: # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
845: # _real_ovfl() - "callout" for Overflow exception enabled code #
846: # _real_inex() - "callout" for Inexact exception enabled code #
847: # _real_trace() - "callout" for Trace exception code #
848: # #
849: # INPUT *************************************************************** #
850: # - The system stack contains the FP Unfl exception stack frame #
851: # - The fsave frame contains the source operand #
852: # #
853: # OUTPUT ************************************************************** #
854: # Underflow Exception enabled: #
855: # - The system stack is unchanged #
856: # - The fsave frame contains the adjusted src op for opclass 0,2 #
857: # Underflow Exception disabled: #
858: # - The system stack is unchanged #
859: # - The "exception present" flag in the fsave frame is cleared #
860: # #
861: # ALGORITHM *********************************************************** #
862: # On the 060, if an FP underflow is present as the result of any #
863: # instruction, the 060 will take an underflow exception whether the #
864: # exception is enabled or disabled in the FPCR. For the disabled case, #
865: # This handler emulates the instruction to determine what the correct #
866: # default result should be for the operation. This default result is #
867: # then stored in either the FP regfile, data regfile, or memory. #
868: # Finally, the handler exits through the "callout" _fpsp_done() #
869: # denoting that no exceptional conditions exist within the machine. #
870: # If the exception is enabled, then this handler must create the #
871: # exceptional operand and plave it in the fsave state frame, and store #
872: # the default result (only if the instruction is opclass 3). For #
873: # exceptions enabled, this handler must exit through the "callout" #
874: # _real_unfl() so that the operating system enabled overflow handler #
875: # can handle this case. #
876: # Two other conditions exist. First, if underflow was disabled #
877: # but the inexact exception was enabled and the result was inexact, #
878: # this handler must exit through the "callout" _real_inex(). #
879: # was inexact. #
880: # Also, in the case of an opclass three instruction where #
881: # underflow was disabled and the trace exception was enabled, this #
882: # handler must exit through the "callout" _real_trace(). #
883: # #
884: #########################################################################
885:
886: global _fpsp_unfl
887: _fpsp_unfl:
888:
889: #$# sub.l &24,%sp # make room for src/dst
890:
891: link.w %a6,&-LOCAL_SIZE # init stack frame
892:
893: fsave FP_SRC(%a6) # grab the "busy" frame
894:
895: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
896: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
897: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
898:
899: # the FPIAR holds the "current PC" of the faulting instruction
900: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
901: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
902: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
903: bsr.l _imem_read_long # fetch the instruction words
904: mov.l %d0,EXC_OPWORD(%a6)
905:
906: ##############################################################################
907:
908: btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
909: bne.w funfl_out
910:
911:
912: lea FP_SRC(%a6),%a0 # pass: ptr to src op
913: bsr.l fix_skewed_ops # fix src op
914:
915: lea FP_SRC(%a6),%a0 # pass: ptr to src op
916: bsr.l set_tag_x # tag the operand type
917: mov.b %d0,STAG(%a6) # maybe NORM,DENORM
918:
919: # bit five of the fp ext word separates the monadic and dyadic operations
920: # that can pass through fpsp_unfl(). remember that fcmp, and ftst
921: # will never take this exception.
922: btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
923: beq.b funfl_extract # monadic
924:
925: # now, what's left that's not dyadic is fsincos. we can distinguish it
926: # from all dyadics by the '0110xxx pattern
927: btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
928: bne.b funfl_extract # yes
929:
930: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
931: bsr.l load_fpn2 # load dst into FP_DST
932:
933: lea FP_DST(%a6),%a0 # pass: ptr to dst op
934: bsr.l set_tag_x # tag the operand type
935: cmpi.b %d0,&UNNORM # is operand an UNNORM?
936: bne.b funfl_op2_done # no
937: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
938: funfl_op2_done:
939: mov.b %d0,DTAG(%a6) # save dst optype tag
940:
941: funfl_extract:
942:
943: #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
944: #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
945: #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
946: #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
947: #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
948: #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
949:
950: clr.l %d0
951: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
952:
953: mov.b 1+EXC_CMDREG(%a6),%d1
954: andi.w &0x007f,%d1 # extract extension
955:
956: andi.l &0x00ff01ff,USER_FPSR(%a6)
957:
958: fmov.l &0x0,%fpcr # zero current control regs
959: fmov.l &0x0,%fpsr
960:
961: lea FP_SRC(%a6),%a0
962: lea FP_DST(%a6),%a1
963:
964: # maybe we can make these entry points ONLY the OVFL entry points of each routine.
965: mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
966: jsr (tbl_unsupp.l,%pc,%d1.l*1)
967:
968: bfextu EXC_CMDREG(%a6){&6:&3},%d0
969: bsr.l store_fpreg
970:
971: # The `060 FPU multiplier hardware is such that if the result of a
972: # multiply operation is the smallest possible normalized number
973: # (0x00000000_80000000_00000000), then the machine will take an
974: # underflow exception. Since this is incorrect, we need to check
975: # if our emulation, after re-doing the operation, decided that
976: # no underflow was called for. We do these checks only in
977: # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
978: # special case will simply exit gracefully with the correct result.
979:
980: # the exceptional possibilities we have left ourselves with are ONLY overflow
981: # and inexact. and, the inexact is such that overflow occurred and was disabled
982: # but inexact was enabled.
983: btst &unfl_bit,FPCR_ENABLE(%a6)
984: bne.b funfl_unfl_on
985:
986: funfl_chkinex:
987: btst &inex2_bit,FPCR_ENABLE(%a6)
988: bne.b funfl_inex_on
989:
990: funfl_exit:
991: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
992: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
993: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
994:
995: unlk %a6
996: #$# add.l &24,%sp
997: bra.l _fpsp_done
998:
999: # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
1000: # in fp1 (don't forget to save fp0). what to do now?
1001: # well, we simply have to get to go to _real_unfl()!
1002: funfl_unfl_on:
1003:
1004: # The `060 FPU multiplier hardware is such that if the result of a
1005: # multiply operation is the smallest possible normalized number
1006: # (0x00000000_80000000_00000000), then the machine will take an
1007: # underflow exception. Since this is incorrect, we check here to see
1008: # if our emulation, after re-doing the operation, decided that
1009: # no underflow was called for.
1010: btst &unfl_bit,FPSR_EXCEPT(%a6)
1011: beq.w funfl_chkinex
1012:
1013: funfl_unfl_on2:
1014: fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1015:
1016: mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1017:
1018: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1019: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1020: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1021:
1022: frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1023:
1024: unlk %a6
1025:
1026: bra.l _real_unfl
1027:
1028: # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1029: # we must jump to real_inex().
1030: funfl_inex_on:
1031:
1032: # The `060 FPU multiplier hardware is such that if the result of a
1033: # multiply operation is the smallest possible normalized number
1034: # (0x00000000_80000000_00000000), then the machine will take an
1035: # underflow exception.
1036: # But, whether bogus or not, if inexact is enabled AND it occurred,
1037: # then we have to branch to real_inex.
1038:
1039: btst &inex2_bit,FPSR_EXCEPT(%a6)
1040: beq.w funfl_exit
1041:
1042: funfl_inex_on2:
1043:
1044: fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1045:
1046: mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1047: mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1048:
1049: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1050: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1051: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1052:
1053: frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1054:
1055: unlk %a6
1056:
1057: bra.l _real_inex
1058:
1059: #######################################################################
1060: funfl_out:
1061:
1062:
1063: #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1064: #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1065: #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1066:
1067: # the src operand is definitely a NORM(!), so tag it as such
1068: mov.b &NORM,STAG(%a6) # set src optype tag
1069:
1070: clr.l %d0
1071: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1072:
1073: and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1074:
1075: fmov.l &0x0,%fpcr # zero current control regs
1076: fmov.l &0x0,%fpsr
1077:
1078: lea FP_SRC(%a6),%a0 # pass ptr to src operand
1079:
1080: bsr.l fout
1081:
1082: btst &unfl_bit,FPCR_ENABLE(%a6)
1083: bne.w funfl_unfl_on2
1084:
1085: btst &inex2_bit,FPCR_ENABLE(%a6)
1086: bne.w funfl_inex_on2
1087:
1088: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1089: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1090: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1091:
1092: unlk %a6
1093: #$# add.l &24,%sp
1094:
1095: btst &0x7,(%sp) # is trace on?
1096: beq.l _fpsp_done # no
1097:
1098: fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1099: mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1100: bra.l _real_trace
1101:
1102: #########################################################################
1103: # XDEF **************************************************************** #
1104: # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1105: # Data Type" exception. #
1106: # #
1107: # This handler should be the first code executed upon taking the #
1108: # FP Unimplemented Data Type exception in an operating system. #
1109: # #
1110: # XREF **************************************************************** #
1111: # _imem_read_{word,long}() - read instruction word/longword #
1112: # fix_skewed_ops() - adjust src operand in fsave frame #
1113: # set_tag_x() - determine optype of src/dst operands #
1114: # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1115: # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1116: # load_fpn2() - load dst operand from FP regfile #
1117: # load_fpn1() - load src operand from FP regfile #
1118: # fout() - emulate an opclass 3 instruction #
1119: # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1120: # _real_inex() - "callout" to operating system inexact handler #
1121: # _fpsp_done() - "callout" for exit; work all done #
1122: # _real_trace() - "callout" for Trace enabled exception #
1123: # funimp_skew() - adjust fsave src ops to "incorrect" value #
1124: # _real_snan() - "callout" for SNAN exception #
1125: # _real_operr() - "callout" for OPERR exception #
1126: # _real_ovfl() - "callout" for OVFL exception #
1127: # _real_unfl() - "callout" for UNFL exception #
1128: # get_packed() - fetch packed operand from memory #
1129: # #
1130: # INPUT *************************************************************** #
1131: # - The system stack contains the "Unimp Data Type" stk frame #
1132: # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1133: # #
1134: # OUTPUT ************************************************************** #
1135: # If Inexact exception (opclass 3): #
1136: # - The system stack is changed to an Inexact exception stk frame #
1137: # If SNAN exception (opclass 3): #
1138: # - The system stack is changed to an SNAN exception stk frame #
1139: # If OPERR exception (opclass 3): #
1140: # - The system stack is changed to an OPERR exception stk frame #
1141: # If OVFL exception (opclass 3): #
1142: # - The system stack is changed to an OVFL exception stk frame #
1143: # If UNFL exception (opclass 3): #
1144: # - The system stack is changed to an UNFL exception stack frame #
1145: # If Trace exception enabled: #
1146: # - The system stack is changed to a Trace exception stack frame #
1147: # Else: (normal case) #
1148: # - Correct result has been stored as appropriate #
1149: # #
1150: # ALGORITHM *********************************************************** #
1151: # Two main instruction types can enter here: (1) DENORM or UNNORM #
1152: # unimplemented data types. These can be either opclass 0,2 or 3 #
1153: # instructions, and (2) PACKED unimplemented data format instructions #
1154: # also of opclasses 0,2, or 3. #
1155: # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1156: # operand from the fsave state frame and the dst operand (if dyadic) #
1157: # from the FP register file. The instruction is then emulated by #
1158: # choosing an emulation routine from a table of routines indexed by #
1159: # instruction type. Once the instruction has been emulated and result #
1160: # saved, then we check to see if any enabled exceptions resulted from #
1161: # instruction emulation. If none, then we exit through the "callout" #
1162: # _fpsp_done(). If there is an enabled FP exception, then we insert #
1163: # this exception into the FPU in the fsave state frame and then exit #
1164: # through _fpsp_done(). #
1165: # PACKED opclass 0 and 2 is similar in how the instruction is #
1166: # emulated and exceptions handled. The differences occur in how the #
1167: # handler loads the packed op (by calling get_packed() routine) and #
1168: # by the fact that a Trace exception could be pending for PACKED ops. #
1169: # If a Trace exception is pending, then the current exception stack #
1170: # frame is changed to a Trace exception stack frame and an exit is #
1171: # made through _real_trace(). #
1172: # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1173: # performed by calling the routine fout(). If no exception should occur #
1174: # as the result of emulation, then an exit either occurs through #
1175: # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1176: # (a Trace stack frame must be created here, too). If an FP exception #
1177: # should occur, then we must create an exception stack frame of that #
1178: # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1179: # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1180: # emulation is performed in a similar manner. #
1181: # #
1182: #########################################################################
1183:
1184: #
1185: # (1) DENORM and UNNORM (unimplemented) data types:
1186: #
1187: # post-instruction
1188: # *****************
1189: # * EA *
1190: # pre-instruction * *
1191: # ***************** *****************
1192: # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1193: # ***************** *****************
1194: # * Next * * Next *
1195: # * PC * * PC *
1196: # ***************** *****************
1197: # * SR * * SR *
1198: # ***************** *****************
1199: #
1200: # (2) PACKED format (unsupported) opclasses two and three:
1201: # *****************
1202: # * EA *
1203: # * *
1204: # *****************
1205: # * 0x2 * 0x0dc *
1206: # *****************
1207: # * Next *
1208: # * PC *
1209: # *****************
1210: # * SR *
1211: # *****************
1212: #
1213: global _fpsp_unsupp
1214: _fpsp_unsupp:
1215:
1216: link.w %a6,&-LOCAL_SIZE # init stack frame
1217:
1218: fsave FP_SRC(%a6) # save fp state
1219:
1220: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1221: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1222: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1223:
1224: btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1225: bne.b fu_s
1226: fu_u:
1227: mov.l %usp,%a0 # fetch user stack pointer
1228: mov.l %a0,EXC_A7(%a6) # save on stack
1229: bra.b fu_cont
1230: # if the exception is an opclass zero or two unimplemented data type
1231: # exception, then the a7' calculated here is wrong since it doesn't
1232: # stack an ea. however, we don't need an a7' for this case anyways.
1233: fu_s:
1234: lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1235: mov.l %a0,EXC_A7(%a6) # save on stack
1236:
1237: fu_cont:
1238:
1239: # the FPIAR holds the "current PC" of the faulting instruction
1240: # the FPIAR should be set correctly for ALL exceptions passing through
1241: # this point.
1242: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1243: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1244: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1245: bsr.l _imem_read_long # fetch the instruction words
1246: mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1247:
1248: ############################
1249:
1250: clr.b SPCOND_FLG(%a6) # clear special condition flag
1251:
1252: # Separate opclass three (fpn-to-mem) ops since they have a different
1253: # stack frame and protocol.
1254: btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1255: bne.w fu_out # yes
1256:
1257: # Separate packed opclass two instructions.
1258: bfextu EXC_CMDREG(%a6){&0:&6},%d0
1259: cmpi.b %d0,&0x13
1260: beq.w fu_in_pack
1261:
1262:
1263: # I'm not sure at this point what FPSR bits are valid for this instruction.
1264: # so, since the emulation routines re-create them anyways, zero exception field
1265: andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1266:
1267: fmov.l &0x0,%fpcr # zero current control regs
1268: fmov.l &0x0,%fpsr
1269:
1270: # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1271: # precision format if the src format was single or double and the
1272: # source data type was an INF, NAN, DENORM, or UNNORM
1273: lea FP_SRC(%a6),%a0 # pass ptr to input
1274: bsr.l fix_skewed_ops
1275:
1276: # we don't know whether the src operand or the dst operand (or both) is the
1277: # UNNORM or DENORM. call the function that tags the operand type. if the
1278: # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1279: lea FP_SRC(%a6),%a0 # pass: ptr to src op
1280: bsr.l set_tag_x # tag the operand type
1281: cmpi.b %d0,&UNNORM # is operand an UNNORM?
1282: bne.b fu_op2 # no
1283: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1284:
1285: fu_op2:
1286: mov.b %d0,STAG(%a6) # save src optype tag
1287:
1288: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1289:
1290: # bit five of the fp extension word separates the monadic and dyadic operations
1291: # at this point
1292: btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1293: beq.b fu_extract # monadic
1294: cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1295: beq.b fu_extract # yes, so it's monadic, too
1296:
1297: bsr.l load_fpn2 # load dst into FP_DST
1298:
1299: lea FP_DST(%a6),%a0 # pass: ptr to dst op
1300: bsr.l set_tag_x # tag the operand type
1301: cmpi.b %d0,&UNNORM # is operand an UNNORM?
1302: bne.b fu_op2_done # no
1303: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1304: fu_op2_done:
1305: mov.b %d0,DTAG(%a6) # save dst optype tag
1306:
1307: fu_extract:
1308: clr.l %d0
1309: mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1310:
1311: bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1312:
1313: lea FP_SRC(%a6),%a0
1314: lea FP_DST(%a6),%a1
1315:
1316: mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1317: jsr (tbl_unsupp.l,%pc,%d1.l*1)
1318:
1319: #
1320: # Exceptions in order of precedence:
1321: # BSUN : none
1322: # SNAN : all dyadic ops
1323: # OPERR : fsqrt(-NORM)
1324: # OVFL : all except ftst,fcmp
1325: # UNFL : all except ftst,fcmp
1326: # DZ : fdiv
1327: # INEX2 : all except ftst,fcmp
1328: # INEX1 : none (packed doesn't go through here)
1329: #
1330:
1331: # we determine the highest priority exception(if any) set by the
1332: # emulation routine that has also been enabled by the user.
1333: mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1334: bne.b fu_in_ena # some are enabled
1335:
1336: fu_in_cont:
1337: # fcmp and ftst do not store any result.
1338: mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1339: andi.b &0x38,%d0 # extract bits 3-5
1340: cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1341: beq.b fu_in_exit # yes
1342:
1343: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1344: bsr.l store_fpreg # store the result
1345:
1346: fu_in_exit:
1347:
1348: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1349: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1350: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1351:
1352: unlk %a6
1353:
1354: bra.l _fpsp_done
1355:
1356: fu_in_ena:
1357: and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1358: bfffo %d0{&24:&8},%d0 # find highest priority exception
1359: bne.b fu_in_exc # there is at least one set
1360:
1361: #
1362: # No exceptions occurred that were also enabled. Now:
1363: #
1364: # if (OVFL && ovfl_disabled && inexact_enabled) {
1365: # branch to _real_inex() (even if the result was exact!);
1366: # } else {
1367: # save the result in the proper fp reg (unless the op is fcmp or ftst);
1368: # return;
1369: # }
1370: #
1371: btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1372: beq.b fu_in_cont # no
1373:
1374: fu_in_ovflchk:
1375: btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1376: beq.b fu_in_cont # no
1377: bra.w fu_in_exc_ovfl # go insert overflow frame
1378:
1379: #
1380: # An exception occurred and that exception was enabled:
1381: #
1382: # shift enabled exception field into lo byte of d0;
1383: # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1384: # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1385: # /*
1386: # * this is the case where we must call _real_inex() now or else
1387: # * there will be no other way to pass it the exceptional operand
1388: # */
1389: # call _real_inex();
1390: # } else {
1391: # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1392: # }
1393: #
1394: fu_in_exc:
1395: subi.l &24,%d0 # fix offset to be 0-8
1396: cmpi.b %d0,&0x6 # is exception INEX? (6)
1397: bne.b fu_in_exc_exit # no
1398:
1399: # the enabled exception was inexact
1400: btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1401: bne.w fu_in_exc_unfl # yes
1402: btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1403: bne.w fu_in_exc_ovfl # yes
1404:
1405: # here, we insert the correct fsave status value into the fsave frame for the
1406: # corresponding exception. the operand in the fsave frame should be the original
1407: # src operand.
1408: fu_in_exc_exit:
1409: mov.l %d0,-(%sp) # save d0
1410: bsr.l funimp_skew # skew sgl or dbl inputs
1411: mov.l (%sp)+,%d0 # restore d0
1412:
1413: mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1414:
1415: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1416: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1417: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1418:
1419: frestore FP_SRC(%a6) # restore src op
1420:
1421: unlk %a6
1422:
1423: bra.l _fpsp_done
1424:
1425: tbl_except:
1426: short 0xe000,0xe006,0xe004,0xe005
1427: short 0xe003,0xe002,0xe001,0xe001
1428:
1429: fu_in_exc_unfl:
1430: mov.w &0x4,%d0
1431: bra.b fu_in_exc_exit
1432: fu_in_exc_ovfl:
1433: mov.w &0x03,%d0
1434: bra.b fu_in_exc_exit
1435:
1436: # If the input operand to this operation was opclass two and a single
1437: # or double precision denorm, inf, or nan, the operand needs to be
1438: # "corrected" in order to have the proper equivalent extended precision
1439: # number.
1440: global fix_skewed_ops
1441: fix_skewed_ops:
1442: bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1443: cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1444: beq.b fso_sgl # yes
1445: cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1446: beq.b fso_dbl # yes
1447: rts # no
1448:
1449: fso_sgl:
1450: mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1451: andi.w &0x7fff,%d0 # strip sign
1452: cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1453: beq.b fso_sgl_dnrm_zero # yes
1454: cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1455: beq.b fso_infnan # yes
1456: rts # no
1457:
1458: fso_sgl_dnrm_zero:
1459: andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1460: beq.b fso_zero # it's a skewed zero
1461: fso_sgl_dnrm:
1462: # here, we count on norm not to alter a0...
1463: bsr.l norm # normalize mantissa
1464: neg.w %d0 # -shft amt
1465: addi.w &0x3f81,%d0 # adjust new exponent
1466: andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1467: or.w %d0,LOCAL_EX(%a0) # insert new exponent
1468: rts
1469:
1470: fso_zero:
1471: andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1472: rts
1473:
1474: fso_infnan:
1475: andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1476: ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1477: rts
1478:
1479: fso_dbl:
1480: mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1481: andi.w &0x7fff,%d0 # strip sign
1482: cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1483: beq.b fso_dbl_dnrm_zero # yes
1484: cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1485: beq.b fso_infnan # yes
1486: rts # no
1487:
1488: fso_dbl_dnrm_zero:
1489: andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1490: bne.b fso_dbl_dnrm # it's a skewed denorm
1491: tst.l LOCAL_LO(%a0) # is it a zero?
1492: beq.b fso_zero # yes
1493: fso_dbl_dnrm:
1494: # here, we count on norm not to alter a0...
1495: bsr.l norm # normalize mantissa
1496: neg.w %d0 # -shft amt
1497: addi.w &0x3c01,%d0 # adjust new exponent
1498: andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1499: or.w %d0,LOCAL_EX(%a0) # insert new exponent
1500: rts
1501:
1502: #################################################################
1503:
1504: # fmove out took an unimplemented data type exception.
1505: # the src operand is in FP_SRC. Call _fout() to write out the result and
1506: # to determine which exceptions, if any, to take.
1507: fu_out:
1508:
1509: # Separate packed move outs from the UNNORM and DENORM move outs.
1510: bfextu EXC_CMDREG(%a6){&3:&3},%d0
1511: cmpi.b %d0,&0x3
1512: beq.w fu_out_pack
1513: cmpi.b %d0,&0x7
1514: beq.w fu_out_pack
1515:
1516:
1517: # I'm not sure at this point what FPSR bits are valid for this instruction.
1518: # so, since the emulation routines re-create them anyways, zero exception field.
1519: # fmove out doesn't affect ccodes.
1520: and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1521:
1522: fmov.l &0x0,%fpcr # zero current control regs
1523: fmov.l &0x0,%fpsr
1524:
1525: # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1526: # call here. just figure out what it is...
1527: mov.w FP_SRC_EX(%a6),%d0 # get exponent
1528: andi.w &0x7fff,%d0 # strip sign
1529: beq.b fu_out_denorm # it's a DENORM
1530:
1531: lea FP_SRC(%a6),%a0
1532: bsr.l unnorm_fix # yes; fix it
1533:
1534: mov.b %d0,STAG(%a6)
1535:
1536: bra.b fu_out_cont
1537: fu_out_denorm:
1538: mov.b &DENORM,STAG(%a6)
1539: fu_out_cont:
1540:
1541: clr.l %d0
1542: mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1543:
1544: lea FP_SRC(%a6),%a0 # pass ptr to src operand
1545:
1546: mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1547: bsr.l fout # call fmove out routine
1548:
1549: # Exceptions in order of precedence:
1550: # BSUN : none
1551: # SNAN : none
1552: # OPERR : fmove.{b,w,l} out of large UNNORM
1553: # OVFL : fmove.{s,d}
1554: # UNFL : fmove.{s,d,x}
1555: # DZ : none
1556: # INEX2 : all
1557: # INEX1 : none (packed doesn't travel through here)
1558:
1559: # determine the highest priority exception(if any) set by the
1560: # emulation routine that has also been enabled by the user.
1561: mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1562: bne.w fu_out_ena # some are enabled
1563:
1564: fu_out_done:
1565:
1566: mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1567:
1568: # on extended precision opclass three instructions using pre-decrement or
1569: # post-increment addressing mode, the address register is not updated. is the
1570: # address register was the stack pointer used from user mode, then let's update
1571: # it here. if it was used from supervisor mode, then we have to handle this
1572: # as a special case.
1573: btst &0x5,EXC_SR(%a6)
1574: bne.b fu_out_done_s
1575:
1576: mov.l EXC_A7(%a6),%a0 # restore a7
1577: mov.l %a0,%usp
1578:
1579: fu_out_done_cont:
1580: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1581: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1582: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1583:
1584: unlk %a6
1585:
1586: btst &0x7,(%sp) # is trace on?
1587: bne.b fu_out_trace # yes
1588:
1589: bra.l _fpsp_done
1590:
1591: # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1592: # ("fmov.x fpm,-(a7)") if so,
1593: fu_out_done_s:
1594: cmpi.b SPCOND_FLG(%a6),&mda7_flg
1595: bne.b fu_out_done_cont
1596:
1597: # the extended precision result is still in fp0. but, we need to save it
1598: # somewhere on the stack until we can copy it to its final resting place.
1599: # here, we're counting on the top of the stack to be the old place-holders
1600: # for fp0/fp1 which have already been restored. that way, we can write
1601: # over those destinations with the shifted stack frame.
1602: fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1603:
1604: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1605: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1606: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1607:
1608: mov.l (%a6),%a6 # restore frame pointer
1609:
1610: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1611: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1612:
1613: # now, copy the result to the proper place on the stack
1614: mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1615: mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1616: mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1617:
1618: add.l &LOCAL_SIZE-0x8,%sp
1619:
1620: btst &0x7,(%sp)
1621: bne.b fu_out_trace
1622:
1623: bra.l _fpsp_done
1624:
1625: fu_out_ena:
1626: and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1627: bfffo %d0{&24:&8},%d0 # find highest priority exception
1628: bne.b fu_out_exc # there is at least one set
1629:
1630: # no exceptions were set.
1631: # if a disabled overflow occurred and inexact was enabled but the result
1632: # was exact, then a branch to _real_inex() is made.
1633: btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1634: beq.w fu_out_done # no
1635:
1636: fu_out_ovflchk:
1637: btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1638: beq.w fu_out_done # no
1639: bra.w fu_inex # yes
1640:
1641: #
1642: # The fp move out that took the "Unimplemented Data Type" exception was
1643: # being traced. Since the stack frames are similar, get the "current" PC
1644: # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1645: #
1646: # UNSUPP FRAME TRACE FRAME
1647: # ***************** *****************
1648: # * EA * * Current *
1649: # * * * PC *
1650: # ***************** *****************
1651: # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1652: # ***************** *****************
1653: # * Next * * Next *
1654: # * PC * * PC *
1655: # ***************** *****************
1656: # * SR * * SR *
1657: # ***************** *****************
1658: #
1659: fu_out_trace:
1660: mov.w &0x2024,0x6(%sp)
1661: fmov.l %fpiar,0x8(%sp)
1662: bra.l _real_trace
1663:
1664: # an exception occurred and that exception was enabled.
1665: fu_out_exc:
1666: subi.l &24,%d0 # fix offset to be 0-8
1667:
1668: # we don't mess with the existing fsave frame. just re-insert it and
1669: # jump to the "_real_{}()" handler...
1670: mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1671: jmp (tbl_fu_out.b,%pc,%d0.w*1)
1672:
1673: swbeg &0x8
1674: tbl_fu_out:
1675: short tbl_fu_out - tbl_fu_out # BSUN can't happen
1676: short tbl_fu_out - tbl_fu_out # SNAN can't happen
1677: short fu_operr - tbl_fu_out # OPERR
1678: short fu_ovfl - tbl_fu_out # OVFL
1679: short fu_unfl - tbl_fu_out # UNFL
1680: short tbl_fu_out - tbl_fu_out # DZ can't happen
1681: short fu_inex - tbl_fu_out # INEX2
1682: short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1683:
1684: # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1685: # frestore it.
1686: fu_snan:
1687: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1688: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1689: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1690:
1691: mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1692: mov.w &0xe006,2+FP_SRC(%a6)
1693:
1694: frestore FP_SRC(%a6)
1695:
1696: unlk %a6
1697:
1698:
1699: bra.l _real_snan
1700:
1701: fu_operr:
1702: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1703: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1704: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1705:
1706: mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1707: mov.w &0xe004,2+FP_SRC(%a6)
1708:
1709: frestore FP_SRC(%a6)
1710:
1711: unlk %a6
1712:
1713:
1714: bra.l _real_operr
1715:
1716: fu_ovfl:
1717: fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1718:
1719: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1720: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1721: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1722:
1723: mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1724: mov.w &0xe005,2+FP_SRC(%a6)
1725:
1726: frestore FP_SRC(%a6) # restore EXOP
1727:
1728: unlk %a6
1729:
1730: bra.l _real_ovfl
1731:
1732: # underflow can happen for extended precision. extended precision opclass
1733: # three instruction exceptions don't update the stack pointer. so, if the
1734: # exception occurred from user mode, then simply update a7 and exit normally.
1735: # if the exception occurred from supervisor mode, check if
1736: fu_unfl:
1737: mov.l EXC_A6(%a6),(%a6) # restore a6
1738:
1739: btst &0x5,EXC_SR(%a6)
1740: bne.w fu_unfl_s
1741:
1742: mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1743: mov.l %a0,%usp # to or not...
1744:
1745: fu_unfl_cont:
1746: fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1747:
1748: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1749: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1750: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1751:
1752: mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1753: mov.w &0xe003,2+FP_SRC(%a6)
1754:
1755: frestore FP_SRC(%a6) # restore EXOP
1756:
1757: unlk %a6
1758:
1759: bra.l _real_unfl
1760:
1761: fu_unfl_s:
1762: cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1763: bne.b fu_unfl_cont
1764:
1765: # the extended precision result is still in fp0. but, we need to save it
1766: # somewhere on the stack until we can copy it to its final resting place
1767: # (where the exc frame is currently). make sure it's not at the top of the
1768: # frame or it will get overwritten when the exc stack frame is shifted "down".
1769: fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1770: fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1771:
1772: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1773: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1774: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1775:
1776: mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1777: mov.w &0xe003,2+FP_DST(%a6)
1778:
1779: frestore FP_DST(%a6) # restore EXOP
1780:
1781: mov.l (%a6),%a6 # restore frame pointer
1782:
1783: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1784: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1785: mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1786:
1787: # now, copy the result to the proper place on the stack
1788: mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1789: mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1790: mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1791:
1792: add.l &LOCAL_SIZE-0x8,%sp
1793:
1794: bra.l _real_unfl
1795:
1796: # fmove in and out enter here.
1797: fu_inex:
1798: fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1799:
1800: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1801: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1802: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1803:
1804: mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1805: mov.w &0xe001,2+FP_SRC(%a6)
1806:
1807: frestore FP_SRC(%a6) # restore EXOP
1808:
1809: unlk %a6
1810:
1811:
1812: bra.l _real_inex
1813:
1814: #########################################################################
1815: #########################################################################
1816: fu_in_pack:
1817:
1818:
1819: # I'm not sure at this point what FPSR bits are valid for this instruction.
1820: # so, since the emulation routines re-create them anyways, zero exception field
1821: andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1822:
1823: fmov.l &0x0,%fpcr # zero current control regs
1824: fmov.l &0x0,%fpsr
1825:
1826: bsr.l get_packed # fetch packed src operand
1827:
1828: lea FP_SRC(%a6),%a0 # pass ptr to src
1829: bsr.l set_tag_x # set src optype tag
1830:
1831: mov.b %d0,STAG(%a6) # save src optype tag
1832:
1833: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1834:
1835: # bit five of the fp extension word separates the monadic and dyadic operations
1836: # at this point
1837: btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1838: beq.b fu_extract_p # monadic
1839: cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1840: beq.b fu_extract_p # yes, so it's monadic, too
1841:
1842: bsr.l load_fpn2 # load dst into FP_DST
1843:
1844: lea FP_DST(%a6),%a0 # pass: ptr to dst op
1845: bsr.l set_tag_x # tag the operand type
1846: cmpi.b %d0,&UNNORM # is operand an UNNORM?
1847: bne.b fu_op2_done_p # no
1848: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1849: fu_op2_done_p:
1850: mov.b %d0,DTAG(%a6) # save dst optype tag
1851:
1852: fu_extract_p:
1853: clr.l %d0
1854: mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1855:
1856: bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1857:
1858: lea FP_SRC(%a6),%a0
1859: lea FP_DST(%a6),%a1
1860:
1861: mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1862: jsr (tbl_unsupp.l,%pc,%d1.l*1)
1863:
1864: #
1865: # Exceptions in order of precedence:
1866: # BSUN : none
1867: # SNAN : all dyadic ops
1868: # OPERR : fsqrt(-NORM)
1869: # OVFL : all except ftst,fcmp
1870: # UNFL : all except ftst,fcmp
1871: # DZ : fdiv
1872: # INEX2 : all except ftst,fcmp
1873: # INEX1 : all
1874: #
1875:
1876: # we determine the highest priority exception(if any) set by the
1877: # emulation routine that has also been enabled by the user.
1878: mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1879: bne.w fu_in_ena_p # some are enabled
1880:
1881: fu_in_cont_p:
1882: # fcmp and ftst do not store any result.
1883: mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1884: andi.b &0x38,%d0 # extract bits 3-5
1885: cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1886: beq.b fu_in_exit_p # yes
1887:
1888: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1889: bsr.l store_fpreg # store the result
1890:
1891: fu_in_exit_p:
1892:
1893: btst &0x5,EXC_SR(%a6) # user or supervisor?
1894: bne.w fu_in_exit_s_p # supervisor
1895:
1896: mov.l EXC_A7(%a6),%a0 # update user a7
1897: mov.l %a0,%usp
1898:
1899: fu_in_exit_cont_p:
1900: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1901: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1902: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1903:
1904: unlk %a6 # unravel stack frame
1905:
1906: btst &0x7,(%sp) # is trace on?
1907: bne.w fu_trace_p # yes
1908:
1909: bra.l _fpsp_done # exit to os
1910:
1911: # the exception occurred in supervisor mode. check to see if the
1912: # addressing mode was (a7)+. if so, we'll need to shift the
1913: # stack frame "up".
1914: fu_in_exit_s_p:
1915: btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1916: beq.b fu_in_exit_cont_p # no
1917:
1918: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1919: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1920: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1921:
1922: unlk %a6 # unravel stack frame
1923:
1924: # shift the stack frame "up". we don't really care about the <ea> field.
1925: mov.l 0x4(%sp),0x10(%sp)
1926: mov.l 0x0(%sp),0xc(%sp)
1927: add.l &0xc,%sp
1928:
1929: btst &0x7,(%sp) # is trace on?
1930: bne.w fu_trace_p # yes
1931:
1932: bra.l _fpsp_done # exit to os
1933:
1934: fu_in_ena_p:
1935: and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1936: bfffo %d0{&24:&8},%d0 # find highest priority exception
1937: bne.b fu_in_exc_p # at least one was set
1938:
1939: #
1940: # No exceptions occurred that were also enabled. Now:
1941: #
1942: # if (OVFL && ovfl_disabled && inexact_enabled) {
1943: # branch to _real_inex() (even if the result was exact!);
1944: # } else {
1945: # save the result in the proper fp reg (unless the op is fcmp or ftst);
1946: # return;
1947: # }
1948: #
1949: btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1950: beq.w fu_in_cont_p # no
1951:
1952: fu_in_ovflchk_p:
1953: btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1954: beq.w fu_in_cont_p # no
1955: bra.w fu_in_exc_ovfl_p # do _real_inex() now
1956:
1957: #
1958: # An exception occurred and that exception was enabled:
1959: #
1960: # shift enabled exception field into lo byte of d0;
1961: # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1962: # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1963: # /*
1964: # * this is the case where we must call _real_inex() now or else
1965: # * there will be no other way to pass it the exceptional operand
1966: # */
1967: # call _real_inex();
1968: # } else {
1969: # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1970: # }
1971: #
1972: fu_in_exc_p:
1973: subi.l &24,%d0 # fix offset to be 0-8
1974: cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1975: blt.b fu_in_exc_exit_p # no
1976:
1977: # the enabled exception was inexact
1978: btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1979: bne.w fu_in_exc_unfl_p # yes
1980: btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1981: bne.w fu_in_exc_ovfl_p # yes
1982:
1983: # here, we insert the correct fsave status value into the fsave frame for the
1984: # corresponding exception. the operand in the fsave frame should be the original
1985: # src operand.
1986: # as a reminder for future predicted pain and agony, we are passing in fsave the
1987: # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1988: # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1989: fu_in_exc_exit_p:
1990: btst &0x5,EXC_SR(%a6) # user or supervisor?
1991: bne.w fu_in_exc_exit_s_p # supervisor
1992:
1993: mov.l EXC_A7(%a6),%a0 # update user a7
1994: mov.l %a0,%usp
1995:
1996: fu_in_exc_exit_cont_p:
1997: mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1998:
1999: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2000: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2001: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2002:
2003: frestore FP_SRC(%a6) # restore src op
2004:
2005: unlk %a6
2006:
2007: btst &0x7,(%sp) # is trace enabled?
2008: bne.w fu_trace_p # yes
2009:
2010: bra.l _fpsp_done
2011:
2012: tbl_except_p:
2013: short 0xe000,0xe006,0xe004,0xe005
2014: short 0xe003,0xe002,0xe001,0xe001
2015:
2016: fu_in_exc_ovfl_p:
2017: mov.w &0x3,%d0
2018: bra.w fu_in_exc_exit_p
2019:
2020: fu_in_exc_unfl_p:
2021: mov.w &0x4,%d0
2022: bra.w fu_in_exc_exit_p
2023:
2024: fu_in_exc_exit_s_p:
2025: btst &mia7_bit,SPCOND_FLG(%a6)
2026: beq.b fu_in_exc_exit_cont_p
2027:
2028: mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2029:
2030: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2031: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2032: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2033:
2034: frestore FP_SRC(%a6) # restore src op
2035:
2036: unlk %a6 # unravel stack frame
2037:
2038: # shift stack frame "up". who cares about <ea> field.
2039: mov.l 0x4(%sp),0x10(%sp)
2040: mov.l 0x0(%sp),0xc(%sp)
2041: add.l &0xc,%sp
2042:
2043: btst &0x7,(%sp) # is trace on?
2044: bne.b fu_trace_p # yes
2045:
2046: bra.l _fpsp_done # exit to os
2047:
2048: #
2049: # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2050: # exception was being traced. Make the "current" PC the FPIAR and put it in the
2051: # trace stack frame then jump to _real_trace().
2052: #
2053: # UNSUPP FRAME TRACE FRAME
2054: # ***************** *****************
2055: # * EA * * Current *
2056: # * * * PC *
2057: # ***************** *****************
2058: # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2059: # ***************** *****************
2060: # * Next * * Next *
2061: # * PC * * PC *
2062: # ***************** *****************
2063: # * SR * * SR *
2064: # ***************** *****************
2065: fu_trace_p:
2066: mov.w &0x2024,0x6(%sp)
2067: fmov.l %fpiar,0x8(%sp)
2068:
2069: bra.l _real_trace
2070:
2071: #########################################################
2072: #########################################################
2073: fu_out_pack:
2074:
2075:
2076: # I'm not sure at this point what FPSR bits are valid for this instruction.
2077: # so, since the emulation routines re-create them anyways, zero exception field.
2078: # fmove out doesn't affect ccodes.
2079: and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2080:
2081: fmov.l &0x0,%fpcr # zero current control regs
2082: fmov.l &0x0,%fpsr
2083:
2084: bfextu EXC_CMDREG(%a6){&6:&3},%d0
2085: bsr.l load_fpn1
2086:
2087: # unlike other opclass 3, unimplemented data type exceptions, packed must be
2088: # able to detect all operand types.
2089: lea FP_SRC(%a6),%a0
2090: bsr.l set_tag_x # tag the operand type
2091: cmpi.b %d0,&UNNORM # is operand an UNNORM?
2092: bne.b fu_op2_p # no
2093: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2094:
2095: fu_op2_p:
2096: mov.b %d0,STAG(%a6) # save src optype tag
2097:
2098: clr.l %d0
2099: mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2100:
2101: lea FP_SRC(%a6),%a0 # pass ptr to src operand
2102:
2103: mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2104: bsr.l fout # call fmove out routine
2105:
2106: # Exceptions in order of precedence:
2107: # BSUN : no
2108: # SNAN : yes
2109: # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2110: # OVFL : no
2111: # UNFL : no
2112: # DZ : no
2113: # INEX2 : yes
2114: # INEX1 : no
2115:
2116: # determine the highest priority exception(if any) set by the
2117: # emulation routine that has also been enabled by the user.
2118: mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2119: bne.w fu_out_ena_p # some are enabled
2120:
2121: fu_out_exit_p:
2122: mov.l EXC_A6(%a6),(%a6) # restore a6
2123:
2124: btst &0x5,EXC_SR(%a6) # user or supervisor?
2125: bne.b fu_out_exit_s_p # supervisor
2126:
2127: mov.l EXC_A7(%a6),%a0 # update user a7
2128: mov.l %a0,%usp
2129:
2130: fu_out_exit_cont_p:
2131: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2132: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2133: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2134:
2135: unlk %a6 # unravel stack frame
2136:
2137: btst &0x7,(%sp) # is trace on?
2138: bne.w fu_trace_p # yes
2139:
2140: bra.l _fpsp_done # exit to os
2141:
2142: # the exception occurred in supervisor mode. check to see if the
2143: # addressing mode was -(a7). if so, we'll need to shift the
2144: # stack frame "down".
2145: fu_out_exit_s_p:
2146: btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2147: beq.b fu_out_exit_cont_p # no
2148:
2149: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2150: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2151: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2152:
2153: mov.l (%a6),%a6 # restore frame pointer
2154:
2155: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2156: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2157:
2158: # now, copy the result to the proper place on the stack
2159: mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2160: mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2161: mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2162:
2163: add.l &LOCAL_SIZE-0x8,%sp
2164:
2165: btst &0x7,(%sp)
2166: bne.w fu_trace_p
2167:
2168: bra.l _fpsp_done
2169:
2170: fu_out_ena_p:
2171: and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2172: bfffo %d0{&24:&8},%d0 # find highest priority exception
2173: beq.w fu_out_exit_p
2174:
2175: mov.l EXC_A6(%a6),(%a6) # restore a6
2176:
2177: # an exception occurred and that exception was enabled.
2178: # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2179: fu_out_exc_p:
2180: cmpi.b %d0,&0x1a
2181: bgt.w fu_inex_p2
2182: beq.w fu_operr_p
2183:
2184: fu_snan_p:
2185: btst &0x5,EXC_SR(%a6)
2186: bne.b fu_snan_s_p
2187:
2188: mov.l EXC_A7(%a6),%a0
2189: mov.l %a0,%usp
2190: bra.w fu_snan
2191:
2192: fu_snan_s_p:
2193: cmpi.b SPCOND_FLG(%a6),&mda7_flg
2194: bne.w fu_snan
2195:
2196: # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2197: # the strategy is to move the exception frame "down" 12 bytes. then, we
2198: # can store the default result where the exception frame was.
2199: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2200: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2201: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2202:
2203: mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2204: mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2205:
2206: frestore FP_SRC(%a6) # restore src operand
2207:
2208: mov.l (%a6),%a6 # restore frame pointer
2209:
2210: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2211: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2212: mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2213:
2214: # now, we copy the default result to its proper location
2215: mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2216: mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2217: mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2218:
2219: add.l &LOCAL_SIZE-0x8,%sp
2220:
2221:
2222: bra.l _real_snan
2223:
2224: fu_operr_p:
2225: btst &0x5,EXC_SR(%a6)
2226: bne.w fu_operr_p_s
2227:
2228: mov.l EXC_A7(%a6),%a0
2229: mov.l %a0,%usp
2230: bra.w fu_operr
2231:
2232: fu_operr_p_s:
2233: cmpi.b SPCOND_FLG(%a6),&mda7_flg
2234: bne.w fu_operr
2235:
2236: # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2237: # the strategy is to move the exception frame "down" 12 bytes. then, we
2238: # can store the default result where the exception frame was.
2239: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2240: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2241: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2242:
2243: mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2244: mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2245:
2246: frestore FP_SRC(%a6) # restore src operand
2247:
2248: mov.l (%a6),%a6 # restore frame pointer
2249:
2250: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2251: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2252: mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2253:
2254: # now, we copy the default result to its proper location
2255: mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2256: mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2257: mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2258:
2259: add.l &LOCAL_SIZE-0x8,%sp
2260:
2261:
2262: bra.l _real_operr
2263:
2264: fu_inex_p2:
2265: btst &0x5,EXC_SR(%a6)
2266: bne.w fu_inex_s_p2
2267:
2268: mov.l EXC_A7(%a6),%a0
2269: mov.l %a0,%usp
2270: bra.w fu_inex
2271:
2272: fu_inex_s_p2:
2273: cmpi.b SPCOND_FLG(%a6),&mda7_flg
2274: bne.w fu_inex
2275:
2276: # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2277: # the strategy is to move the exception frame "down" 12 bytes. then, we
2278: # can store the default result where the exception frame was.
2279: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2280: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2281: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2282:
2283: mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2284: mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2285:
2286: frestore FP_SRC(%a6) # restore src operand
2287:
2288: mov.l (%a6),%a6 # restore frame pointer
2289:
2290: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2291: mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2292: mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2293:
2294: # now, we copy the default result to its proper location
2295: mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2296: mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2297: mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2298:
2299: add.l &LOCAL_SIZE-0x8,%sp
2300:
2301:
2302: bra.l _real_inex
2303:
2304: #########################################################################
2305:
2306: #
2307: # if we're stuffing a source operand back into an fsave frame then we
2308: # have to make sure that for single or double source operands that the
2309: # format stuffed is as weird as the hardware usually makes it.
2310: #
2311: global funimp_skew
2312: funimp_skew:
2313: bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2314: cmpi.b %d0,&0x1 # was src sgl?
2315: beq.b funimp_skew_sgl # yes
2316: cmpi.b %d0,&0x5 # was src dbl?
2317: beq.b funimp_skew_dbl # yes
2318: rts
2319:
2320: funimp_skew_sgl:
2321: mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2322: andi.w &0x7fff,%d0 # strip sign
2323: beq.b funimp_skew_sgl_not
2324: cmpi.w %d0,&0x3f80
2325: bgt.b funimp_skew_sgl_not
2326: neg.w %d0 # make exponent negative
2327: addi.w &0x3f81,%d0 # find amt to shift
2328: mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2329: lsr.l %d0,%d1 # shift it
2330: bset &31,%d1 # set j-bit
2331: mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2332: andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2333: ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2334: funimp_skew_sgl_not:
2335: rts
2336:
2337: funimp_skew_dbl:
2338: mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2339: andi.w &0x7fff,%d0 # strip sign
2340: beq.b funimp_skew_dbl_not
2341: cmpi.w %d0,&0x3c00
2342: bgt.b funimp_skew_dbl_not
2343:
2344: tst.b FP_SRC_EX(%a6) # make "internal format"
2345: smi.b 0x2+FP_SRC(%a6)
2346: mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2347: clr.l %d0 # clear g,r,s
2348: lea FP_SRC(%a6),%a0 # pass ptr to src op
2349: mov.w &0x3c01,%d1 # pass denorm threshold
2350: bsr.l dnrm_lp # denorm it
2351: mov.w &0x3c00,%d0 # new exponent
2352: tst.b 0x2+FP_SRC(%a6) # is sign set?
2353: beq.b fss_dbl_denorm_done # no
2354: bset &15,%d0 # set sign
2355: fss_dbl_denorm_done:
2356: bset &0x7,FP_SRC_HI(%a6) # set j-bit
2357: mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2358: funimp_skew_dbl_not:
2359: rts
2360:
2361: #########################################################################
2362: global _mem_write2
2363: _mem_write2:
2364: btst &0x5,EXC_SR(%a6)
2365: beq.l _dmem_write
2366: mov.l 0x0(%a0),FP_DST_EX(%a6)
2367: mov.l 0x4(%a0),FP_DST_HI(%a6)
2368: mov.l 0x8(%a0),FP_DST_LO(%a6)
2369: clr.l %d1
2370: rts
2371:
2372: #########################################################################
2373: # XDEF **************************************************************** #
2374: # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2375: # effective address" exception. #
2376: # #
2377: # This handler should be the first code executed upon taking the #
2378: # FP Unimplemented Effective Address exception in an operating #
2379: # system. #
2380: # #
2381: # XREF **************************************************************** #
2382: # _imem_read_long() - read instruction longword #
2383: # fix_skewed_ops() - adjust src operand in fsave frame #
2384: # set_tag_x() - determine optype of src/dst operands #
2385: # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2386: # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2387: # load_fpn2() - load dst operand from FP regfile #
2388: # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2389: # decbin() - convert packed data to FP binary data #
2390: # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2391: # _real_access() - "callout" for access error exception #
2392: # _mem_read() - read extended immediate operand from memory #
2393: # _fpsp_done() - "callout" for exit; work all done #
2394: # _real_trace() - "callout" for Trace enabled exception #
2395: # fmovm_dynamic() - emulate dynamic fmovm instruction #
2396: # fmovm_ctrl() - emulate fmovm control instruction #
2397: # #
2398: # INPUT *************************************************************** #
2399: # - The system stack contains the "Unimplemented <ea>" stk frame #
2400: # #
2401: # OUTPUT ************************************************************** #
2402: # If access error: #
2403: # - The system stack is changed to an access error stack frame #
2404: # If FPU disabled: #
2405: # - The system stack is changed to an FPU disabled stack frame #
2406: # If Trace exception enabled: #
2407: # - The system stack is changed to a Trace exception stack frame #
2408: # Else: (normal case) #
2409: # - None (correct result has been stored as appropriate) #
2410: # #
2411: # ALGORITHM *********************************************************** #
2412: # This exception handles 3 types of operations: #
2413: # (1) FP Instructions using extended precision or packed immediate #
2414: # addressing mode. #
2415: # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2416: # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2417: # #
2418: # For immediate data operations, the data is read in w/ a #
2419: # _mem_read() "callout", converted to FP binary (if packed), and used #
2420: # as the source operand to the instruction specified by the instruction #
2421: # word. If no FP exception should be reported ads a result of the #
2422: # emulation, then the result is stored to the destination register and #
2423: # the handler exits through _fpsp_done(). If an enabled exc has been #
2424: # signalled as a result of emulation, then an fsave state frame #
2425: # corresponding to the FP exception type must be entered into the 060 #
2426: # FPU before exiting. In either the enabled or disabled cases, we #
2427: # must also check if a Trace exception is pending, in which case, we #
2428: # must create a Trace exception stack frame from the current exception #
2429: # stack frame. If no Trace is pending, we simply exit through #
2430: # _fpsp_done(). #
2431: # For "fmovm.x", call the routine fmovm_dynamic() which will #
2432: # decode and emulate the instruction. No FP exceptions can be pending #
2433: # as a result of this operation emulation. A Trace exception can be #
2434: # pending, though, which means the current stack frame must be changed #
2435: # to a Trace stack frame and an exit made through _real_trace(). #
2436: # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2437: # was executed from supervisor mode, this handler must store the FP #
2438: # register file values to the system stack by itself since #
2439: # fmovm_dynamic() can't handle this. A normal exit is made through #
2440: # fpsp_done(). #
2441: # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2442: # Again, a Trace exception may be pending and an exit made through #
2443: # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2444: # #
2445: # Before any of the above is attempted, it must be checked to #
2446: # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2447: # before the "FPU disabled" exception, but the "FPU disabled" exception #
2448: # has higher priority, we check the disabled bit in the PCR. If set, #
2449: # then we must create an 8 word "FPU disabled" exception stack frame #
2450: # from the current 4 word exception stack frame. This includes #
2451: # reproducing the effective address of the instruction to put on the #
2452: # new stack frame. #
2453: # #
2454: # In the process of all emulation work, if a _mem_read() #
2455: # "callout" returns a failing result indicating an access error, then #
2456: # we must create an access error stack frame from the current stack #
2457: # frame. This information includes a faulting address and a fault- #
2458: # status-longword. These are created within this handler. #
2459: # #
2460: #########################################################################
2461:
2462: global _fpsp_effadd
2463: _fpsp_effadd:
2464:
2465: # This exception type takes priority over the "Line F Emulator"
2466: # exception. Therefore, the FPU could be disabled when entering here.
2467: # So, we must check to see if it's disabled and handle that case separately.
2468: mov.l %d0,-(%sp) # save d0
2469: movc %pcr,%d0 # load proc cr
2470: btst &0x1,%d0 # is FPU disabled?
2471: bne.w iea_disabled # yes
2472: mov.l (%sp)+,%d0 # restore d0
2473:
2474: link %a6,&-LOCAL_SIZE # init stack frame
2475:
2476: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2477: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2478: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2479:
2480: # PC of instruction that took the exception is the PC in the frame
2481: mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2482:
2483: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2484: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2485: bsr.l _imem_read_long # fetch the instruction words
2486: mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2487:
2488: #########################################################################
2489:
2490: tst.w %d0 # is operation fmovem?
2491: bmi.w iea_fmovm # yes
2492:
2493: #
2494: # here, we will have:
2495: # fabs fdabs fsabs facos fmod
2496: # fadd fdadd fsadd fasin frem
2497: # fcmp fatan fscale
2498: # fdiv fddiv fsdiv fatanh fsin
2499: # fint fcos fsincos
2500: # fintrz fcosh fsinh
2501: # fmove fdmove fsmove fetox ftan
2502: # fmul fdmul fsmul fetoxm1 ftanh
2503: # fneg fdneg fsneg fgetexp ftentox
2504: # fsgldiv fgetman ftwotox
2505: # fsglmul flog10
2506: # fsqrt flog2
2507: # fsub fdsub fssub flogn
2508: # ftst flognp1
2509: # which can all use f<op>.{x,p}
2510: # so, now it's immediate data extended precision AND PACKED FORMAT!
2511: #
2512: iea_op:
2513: andi.l &0x00ff00ff,USER_FPSR(%a6)
2514:
2515: btst &0xa,%d0 # is src fmt x or p?
2516: bne.b iea_op_pack # packed
2517:
2518:
2519: mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2520: lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2521: mov.l &0xc,%d0 # pass: 12 bytes
2522: bsr.l _imem_read # read extended immediate
2523:
2524: tst.l %d1 # did ifetch fail?
2525: bne.w iea_iacc # yes
2526:
2527: bra.b iea_op_setsrc
2528:
2529: iea_op_pack:
2530:
2531: mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2532: lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2533: mov.l &0xc,%d0 # pass: 12 bytes
2534: bsr.l _imem_read # read packed operand
2535:
2536: tst.l %d1 # did ifetch fail?
2537: bne.w iea_iacc # yes
2538:
2539: # The packed operand is an INF or a NAN if the exponent field is all ones.
2540: bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2541: cmpi.w %d0,&0x7fff # INF or NAN?
2542: beq.b iea_op_setsrc # operand is an INF or NAN
2543:
2544: # The packed operand is a zero if the mantissa is all zero, else it's
2545: # a normal packed op.
2546: mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2547: andi.b &0x0f,%d0 # clear all but last nybble
2548: bne.b iea_op_gp_not_spec # not a zero
2549: tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2550: bne.b iea_op_gp_not_spec # not a zero
2551: tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2552: beq.b iea_op_setsrc # operand is a ZERO
2553: iea_op_gp_not_spec:
2554: lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2555: bsr.l decbin # convert to extended
2556: fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2557:
2558: iea_op_setsrc:
2559: addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2560:
2561: # FP_SRC now holds the src operand.
2562: lea FP_SRC(%a6),%a0 # pass: ptr to src op
2563: bsr.l set_tag_x # tag the operand type
2564: mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2565: cmpi.b %d0,&UNNORM # is operand an UNNORM?
2566: bne.b iea_op_getdst # no
2567: bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2568: mov.b %d0,STAG(%a6) # set new optype tag
2569: iea_op_getdst:
2570: clr.b STORE_FLG(%a6) # clear "store result" boolean
2571:
2572: btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2573: beq.b iea_op_extract # monadic
2574: btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2575: bne.b iea_op_spec # yes
2576:
2577: iea_op_loaddst:
2578: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2579: bsr.l load_fpn2 # load dst operand
2580:
2581: lea FP_DST(%a6),%a0 # pass: ptr to dst op
2582: bsr.l set_tag_x # tag the operand type
2583: mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2584: cmpi.b %d0,&UNNORM # is operand an UNNORM?
2585: bne.b iea_op_extract # no
2586: bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2587: mov.b %d0,DTAG(%a6) # set new optype tag
2588: bra.b iea_op_extract
2589:
2590: # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2591: iea_op_spec:
2592: btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2593: beq.b iea_op_extract # yes
2594: # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2595: # store a result. then, only fcmp will branch back and pick up a dst operand.
2596: st STORE_FLG(%a6) # don't store a final result
2597: btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2598: beq.b iea_op_loaddst # yes
2599:
2600: iea_op_extract:
2601: clr.l %d0
2602: mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2603:
2604: mov.b 1+EXC_CMDREG(%a6),%d1
2605: andi.w &0x007f,%d1 # extract extension
2606:
2607: fmov.l &0x0,%fpcr
2608: fmov.l &0x0,%fpsr
2609:
2610: lea FP_SRC(%a6),%a0
2611: lea FP_DST(%a6),%a1
2612:
2613: mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2614: jsr (tbl_unsupp.l,%pc,%d1.l*1)
2615:
2616: #
2617: # Exceptions in order of precedence:
2618: # BSUN : none
2619: # SNAN : all operations
2620: # OPERR : all reg-reg or mem-reg operations that can normally operr
2621: # OVFL : same as OPERR
2622: # UNFL : same as OPERR
2623: # DZ : same as OPERR
2624: # INEX2 : same as OPERR
2625: # INEX1 : all packed immediate operations
2626: #
2627:
2628: # we determine the highest priority exception(if any) set by the
2629: # emulation routine that has also been enabled by the user.
2630: mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2631: bne.b iea_op_ena # some are enabled
2632:
2633: # now, we save the result, unless, of course, the operation was ftst or fcmp.
2634: # these don't save results.
2635: iea_op_save:
2636: tst.b STORE_FLG(%a6) # does this op store a result?
2637: bne.b iea_op_exit1 # exit with no frestore
2638:
2639: iea_op_store:
2640: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2641: bsr.l store_fpreg # store the result
2642:
2643: iea_op_exit1:
2644: mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2645: mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2646:
2647: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2648: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2649: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2650:
2651: unlk %a6 # unravel the frame
2652:
2653: btst &0x7,(%sp) # is trace on?
2654: bne.w iea_op_trace # yes
2655:
2656: bra.l _fpsp_done # exit to os
2657:
2658: iea_op_ena:
2659: and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2660: bfffo %d0{&24:&8},%d0 # find highest priority exception
2661: bne.b iea_op_exc # at least one was set
2662:
2663: # no exception occurred. now, did a disabled, exact overflow occur with inexact
2664: # enabled? if so, then we have to stuff an overflow frame into the FPU.
2665: btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2666: beq.b iea_op_save
2667:
2668: iea_op_ovfl:
2669: btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2670: beq.b iea_op_store # no
2671: bra.b iea_op_exc_ovfl # yes
2672:
2673: # an enabled exception occurred. we have to insert the exception type back into
2674: # the machine.
2675: iea_op_exc:
2676: subi.l &24,%d0 # fix offset to be 0-8
2677: cmpi.b %d0,&0x6 # is exception INEX?
2678: bne.b iea_op_exc_force # no
2679:
2680: # the enabled exception was inexact. so, if it occurs with an overflow
2681: # or underflow that was disabled, then we have to force an overflow or
2682: # underflow frame.
2683: btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2684: bne.b iea_op_exc_ovfl # yes
2685: btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2686: bne.b iea_op_exc_unfl # yes
2687:
2688: iea_op_exc_force:
2689: mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2690: bra.b iea_op_exit2 # exit with frestore
2691:
2692: tbl_iea_except:
2693: short 0xe002, 0xe006, 0xe004, 0xe005
2694: short 0xe003, 0xe002, 0xe001, 0xe001
2695:
2696: iea_op_exc_ovfl:
2697: mov.w &0xe005,2+FP_SRC(%a6)
2698: bra.b iea_op_exit2
2699:
2700: iea_op_exc_unfl:
2701: mov.w &0xe003,2+FP_SRC(%a6)
2702:
2703: iea_op_exit2:
2704: mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2705: mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2706:
2707: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2708: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2709: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2710:
2711: frestore FP_SRC(%a6) # restore exceptional state
2712:
2713: unlk %a6 # unravel the frame
2714:
2715: btst &0x7,(%sp) # is trace on?
2716: bne.b iea_op_trace # yes
2717:
2718: bra.l _fpsp_done # exit to os
2719:
2720: #
2721: # The opclass two instruction that took an "Unimplemented Effective Address"
2722: # exception was being traced. Make the "current" PC the FPIAR and put it in
2723: # the trace stack frame then jump to _real_trace().
2724: #
2725: # UNIMP EA FRAME TRACE FRAME
2726: # ***************** *****************
2727: # * 0x0 * 0x0f0 * * Current *
2728: # ***************** * PC *
2729: # * Current * *****************
2730: # * PC * * 0x2 * 0x024 *
2731: # ***************** *****************
2732: # * SR * * Next *
2733: # ***************** * PC *
2734: # *****************
2735: # * SR *
2736: # *****************
2737: iea_op_trace:
2738: mov.l (%sp),-(%sp) # shift stack frame "down"
2739: mov.w 0x8(%sp),0x4(%sp)
2740: mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2741: fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2742:
2743: bra.l _real_trace
2744:
2745: #########################################################################
2746: iea_fmovm:
2747: btst &14,%d0 # ctrl or data reg
2748: beq.w iea_fmovm_ctrl
2749:
2750: iea_fmovm_data:
2751:
2752: btst &0x5,EXC_SR(%a6) # user or supervisor mode
2753: bne.b iea_fmovm_data_s
2754:
2755: iea_fmovm_data_u:
2756: mov.l %usp,%a0
2757: mov.l %a0,EXC_A7(%a6) # store current a7
2758: bsr.l fmovm_dynamic # do dynamic fmovm
2759: mov.l EXC_A7(%a6),%a0 # load possibly new a7
2760: mov.l %a0,%usp # update usp
2761: bra.w iea_fmovm_exit
2762:
2763: iea_fmovm_data_s:
2764: clr.b SPCOND_FLG(%a6)
2765: lea 0x2+EXC_VOFF(%a6),%a0
2766: mov.l %a0,EXC_A7(%a6)
2767: bsr.l fmovm_dynamic # do dynamic fmovm
2768:
2769: cmpi.b SPCOND_FLG(%a6),&mda7_flg
2770: beq.w iea_fmovm_data_predec
2771: cmpi.b SPCOND_FLG(%a6),&mia7_flg
2772: bne.w iea_fmovm_exit
2773:
2774: # right now, d0 = the size.
2775: # the data has been fetched from the supervisor stack, but we have not
2776: # incremented the stack pointer by the appropriate number of bytes.
2777: # do it here.
2778: iea_fmovm_data_postinc:
2779: btst &0x7,EXC_SR(%a6)
2780: bne.b iea_fmovm_data_pi_trace
2781:
2782: mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2783: mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2784: mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2785:
2786: lea (EXC_SR,%a6,%d0),%a0
2787: mov.l %a0,EXC_SR(%a6)
2788:
2789: fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2790: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2791: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2792:
2793: unlk %a6
2794: mov.l (%sp)+,%sp
2795: bra.l _fpsp_done
2796:
2797: iea_fmovm_data_pi_trace:
2798: mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2799: mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2800: mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2801: mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2802:
2803: lea (EXC_SR-0x4,%a6,%d0),%a0
2804: mov.l %a0,EXC_SR(%a6)
2805:
2806: fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2807: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2808: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2809:
2810: unlk %a6
2811: mov.l (%sp)+,%sp
2812: bra.l _real_trace
2813:
2814: # right now, d1 = size and d0 = the strg.
2815: iea_fmovm_data_predec:
2816: mov.b %d1,EXC_VOFF(%a6) # store strg
2817: mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2818:
2819: fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2820: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2821: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2822:
2823: mov.l (%a6),-(%sp) # make a copy of a6
2824: mov.l %d0,-(%sp) # save d0
2825: mov.l %d1,-(%sp) # save d1
2826: mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2827:
2828: clr.l %d0
2829: mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2830: neg.l %d0 # get negative of size
2831:
2832: btst &0x7,EXC_SR(%a6) # is trace enabled?
2833: beq.b iea_fmovm_data_p2
2834:
2835: mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2836: mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2837: mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2838: mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2839:
2840: pea (%a6,%d0) # create final sp
2841: bra.b iea_fmovm_data_p3
2842:
2843: iea_fmovm_data_p2:
2844: mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2845: mov.l (%sp)+,(EXC_PC,%a6,%d0)
2846: mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2847:
2848: pea (0x4,%a6,%d0) # create final sp
2849:
2850: iea_fmovm_data_p3:
2851: clr.l %d1
2852: mov.b EXC_VOFF(%a6),%d1 # fetch strg
2853:
2854: tst.b %d1
2855: bpl.b fm_1
2856: fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2857: addi.l &0xc,%d0
2858: fm_1:
2859: lsl.b &0x1,%d1
2860: bpl.b fm_2
2861: fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2862: addi.l &0xc,%d0
2863: fm_2:
2864: lsl.b &0x1,%d1
2865: bpl.b fm_3
2866: fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2867: addi.l &0xc,%d0
2868: fm_3:
2869: lsl.b &0x1,%d1
2870: bpl.b fm_4
2871: fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2872: addi.l &0xc,%d0
2873: fm_4:
2874: lsl.b &0x1,%d1
2875: bpl.b fm_5
2876: fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2877: addi.l &0xc,%d0
2878: fm_5:
2879: lsl.b &0x1,%d1
2880: bpl.b fm_6
2881: fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2882: addi.l &0xc,%d0
2883: fm_6:
2884: lsl.b &0x1,%d1
2885: bpl.b fm_7
2886: fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2887: addi.l &0xc,%d0
2888: fm_7:
2889: lsl.b &0x1,%d1
2890: bpl.b fm_end
2891: fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2892: fm_end:
2893: mov.l 0x4(%sp),%d1
2894: mov.l 0x8(%sp),%d0
2895: mov.l 0xc(%sp),%a6
2896: mov.l (%sp)+,%sp
2897:
2898: btst &0x7,(%sp) # is trace enabled?
2899: beq.l _fpsp_done
2900: bra.l _real_trace
2901:
2902: #########################################################################
2903: iea_fmovm_ctrl:
2904:
2905: bsr.l fmovm_ctrl # load ctrl regs
2906:
2907: iea_fmovm_exit:
2908: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2909: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2910: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2911:
2912: btst &0x7,EXC_SR(%a6) # is trace on?
2913: bne.b iea_fmovm_trace # yes
2914:
2915: mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2916:
2917: unlk %a6 # unravel the frame
2918:
2919: bra.l _fpsp_done # exit to os
2920:
2921: #
2922: # The control reg instruction that took an "Unimplemented Effective Address"
2923: # exception was being traced. The "Current PC" for the trace frame is the
2924: # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2925: # After fixing the stack frame, jump to _real_trace().
2926: #
2927: # UNIMP EA FRAME TRACE FRAME
2928: # ***************** *****************
2929: # * 0x0 * 0x0f0 * * Current *
2930: # ***************** * PC *
2931: # * Current * *****************
2932: # * PC * * 0x2 * 0x024 *
2933: # ***************** *****************
2934: # * SR * * Next *
2935: # ***************** * PC *
2936: # *****************
2937: # * SR *
2938: # *****************
2939: # this ain't a pretty solution, but it works:
2940: # -restore a6 (not with unlk)
2941: # -shift stack frame down over where old a6 used to be
2942: # -add LOCAL_SIZE to stack pointer
2943: iea_fmovm_trace:
2944: mov.l (%a6),%a6 # restore frame pointer
2945: mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2946: mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2947: mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2948: mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2949: add.l &LOCAL_SIZE,%sp # clear stack frame
2950:
2951: bra.l _real_trace
2952:
2953: #########################################################################
2954: # The FPU is disabled and so we should really have taken the "Line
2955: # F Emulator" exception. So, here we create an 8-word stack frame
2956: # from our 4-word stack frame. This means we must calculate the length
2957: # of the faulting instruction to get the "next PC". This is trivial for
2958: # immediate operands but requires some extra work for fmovm dynamic
2959: # which can use most addressing modes.
2960: iea_disabled:
2961: mov.l (%sp)+,%d0 # restore d0
2962:
2963: link %a6,&-LOCAL_SIZE # init stack frame
2964:
2965: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2966:
2967: # PC of instruction that took the exception is the PC in the frame
2968: mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2969: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2970: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2971: bsr.l _imem_read_long # fetch the instruction words
2972: mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2973:
2974: tst.w %d0 # is instr fmovm?
2975: bmi.b iea_dis_fmovm # yes
2976: # instruction is using an extended precision immediate operand. therefore,
2977: # the total instruction length is 16 bytes.
2978: iea_dis_immed:
2979: mov.l &0x10,%d0 # 16 bytes of instruction
2980: bra.b iea_dis_cont
2981: iea_dis_fmovm:
2982: btst &0xe,%d0 # is instr fmovm ctrl
2983: bne.b iea_dis_fmovm_data # no
2984: # the instruction is a fmovm.l with 2 or 3 registers.
2985: bfextu %d0{&19:&3},%d1
2986: mov.l &0xc,%d0
2987: cmpi.b %d1,&0x7 # move all regs?
2988: bne.b iea_dis_cont
2989: addq.l &0x4,%d0
2990: bra.b iea_dis_cont
2991: # the instruction is an fmovm.x dynamic which can use many addressing
2992: # modes and thus can have several different total instruction lengths.
2993: # call fmovm_calc_ea which will go through the ea calc process and,
2994: # as a by-product, will tell us how long the instruction is.
2995: iea_dis_fmovm_data:
2996: clr.l %d0
2997: bsr.l fmovm_calc_ea
2998: mov.l EXC_EXTWPTR(%a6),%d0
2999: sub.l EXC_PC(%a6),%d0
3000: iea_dis_cont:
3001: mov.w %d0,EXC_VOFF(%a6) # store stack shift value
3002:
3003: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3004:
3005: unlk %a6
3006:
3007: # here, we actually create the 8-word frame from the 4-word frame,
3008: # with the "next PC" as additional info.
3009: # the <ea> field is let as undefined.
3010: subq.l &0x8,%sp # make room for new stack
3011: mov.l %d0,-(%sp) # save d0
3012: mov.w 0xc(%sp),0x4(%sp) # move SR
3013: mov.l 0xe(%sp),0x6(%sp) # move Current PC
3014: clr.l %d0
3015: mov.w 0x12(%sp),%d0
3016: mov.l 0x6(%sp),0x10(%sp) # move Current PC
3017: add.l %d0,0x6(%sp) # make Next PC
3018: mov.w &0x402c,0xa(%sp) # insert offset,frame format
3019: mov.l (%sp)+,%d0 # restore d0
3020:
3021: bra.l _real_fpu_disabled
3022:
3023: ##########
3024:
3025: iea_iacc:
3026: movc %pcr,%d0
3027: btst &0x1,%d0
3028: bne.b iea_iacc_cont
3029: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3030: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3031: iea_iacc_cont:
3032: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3033:
3034: unlk %a6
3035:
3036: subq.w &0x8,%sp # make stack frame bigger
3037: mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3038: mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3039: mov.w &0x4008,0x6(%sp) # store voff
3040: mov.l 0x2(%sp),0x8(%sp) # store ea
3041: mov.l &0x09428001,0xc(%sp) # store fslw
3042:
3043: iea_acc_done:
3044: btst &0x5,(%sp) # user or supervisor mode?
3045: beq.b iea_acc_done2 # user
3046: bset &0x2,0xd(%sp) # set supervisor TM bit
3047:
3048: iea_acc_done2:
3049: bra.l _real_access
3050:
3051: iea_dacc:
3052: lea -LOCAL_SIZE(%a6),%sp
3053:
3054: movc %pcr,%d1
3055: btst &0x1,%d1
3056: bne.b iea_dacc_cont
3057: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3058: fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3059: iea_dacc_cont:
3060: mov.l (%a6),%a6
3061:
3062: mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3063: mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3064: mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3065: mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3066: mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3067: mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3068:
3069: movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3070: add.w &LOCAL_SIZE-0x4,%sp
3071:
3072: bra.b iea_acc_done
3073:
3074: #########################################################################
3075: # XDEF **************************************************************** #
3076: # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3077: # #
3078: # This handler should be the first code executed upon taking the #
3079: # FP Operand Error exception in an operating system. #
3080: # #
3081: # XREF **************************************************************** #
3082: # _imem_read_long() - read instruction longword #
3083: # fix_skewed_ops() - adjust src operand in fsave frame #
3084: # _real_operr() - "callout" to operating system operr handler #
3085: # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3086: # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3087: # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3088: # #
3089: # INPUT *************************************************************** #
3090: # - The system stack contains the FP Operr exception frame #
3091: # - The fsave frame contains the source operand #
3092: # #
3093: # OUTPUT ************************************************************** #
3094: # No access error: #
3095: # - The system stack is unchanged #
3096: # - The fsave frame contains the adjusted src op for opclass 0,2 #
3097: # #
3098: # ALGORITHM *********************************************************** #
3099: # In a system where the FP Operr exception is enabled, the goal #
3100: # is to get to the handler specified at _real_operr(). But, on the 060, #
3101: # for opclass zero and two instruction taking this exception, the #
3102: # input operand in the fsave frame may be incorrect for some cases #
3103: # and needs to be corrected. This handler calls fix_skewed_ops() to #
3104: # do just this and then exits through _real_operr(). #
3105: # For opclass 3 instructions, the 060 doesn't store the default #
3106: # operr result out to memory or data register file as it should. #
3107: # This code must emulate the move out before finally exiting through #
3108: # _real_inex(). The move out, if to memory, is performed using #
3109: # _mem_write() "callout" routines that may return a failing result. #
3110: # In this special case, the handler must exit through facc_out() #
3111: # which creates an access error stack frame from the current operr #
3112: # stack frame. #
3113: # #
3114: #########################################################################
3115:
3116: global _fpsp_operr
3117: _fpsp_operr:
3118:
3119: link.w %a6,&-LOCAL_SIZE # init stack frame
3120:
3121: fsave FP_SRC(%a6) # grab the "busy" frame
3122:
3123: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3124: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3125: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3126:
3127: # the FPIAR holds the "current PC" of the faulting instruction
3128: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3129:
3130: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3131: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3132: bsr.l _imem_read_long # fetch the instruction words
3133: mov.l %d0,EXC_OPWORD(%a6)
3134:
3135: ##############################################################################
3136:
3137: btst &13,%d0 # is instr an fmove out?
3138: bne.b foperr_out # fmove out
3139:
3140:
3141: # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3142: # this would be the case for opclass two operations with a source infinity or
3143: # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3144: # cause an operr so we don't need to check for them here.
3145: lea FP_SRC(%a6),%a0 # pass: ptr to src op
3146: bsr.l fix_skewed_ops # fix src op
3147:
3148: foperr_exit:
3149: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3150: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3151: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3152:
3153: frestore FP_SRC(%a6)
3154:
3155: unlk %a6
3156: bra.l _real_operr
3157:
3158: ########################################################################
3159:
3160: #
3161: # the hardware does not save the default result to memory on enabled
3162: # operand error exceptions. we do this here before passing control to
3163: # the user operand error handler.
3164: #
3165: # byte, word, and long destination format operations can pass
3166: # through here. we simply need to test the sign of the src
3167: # operand and save the appropriate minimum or maximum integer value
3168: # to the effective address as pointed to by the stacked effective address.
3169: #
3170: # although packed opclass three operations can take operand error
3171: # exceptions, they won't pass through here since they are caught
3172: # first by the unsupported data format exception handler. that handler
3173: # sends them directly to _real_operr() if necessary.
3174: #
3175: foperr_out:
3176:
3177: mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3178: andi.w &0x7fff,%d1
3179: cmpi.w %d1,&0x7fff
3180: bne.b foperr_out_not_qnan
3181: # the operand is either an infinity or a QNAN.
3182: tst.l FP_SRC_LO(%a6)
3183: bne.b foperr_out_qnan
3184: mov.l FP_SRC_HI(%a6),%d1
3185: andi.l &0x7fffffff,%d1
3186: beq.b foperr_out_not_qnan
3187: foperr_out_qnan:
3188: mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3189: bra.b foperr_out_jmp
3190:
3191: foperr_out_not_qnan:
3192: mov.l &0x7fffffff,%d1
3193: tst.b FP_SRC_EX(%a6)
3194: bpl.b foperr_out_not_qnan2
3195: addq.l &0x1,%d1
3196: foperr_out_not_qnan2:
3197: mov.l %d1,L_SCR1(%a6)
3198:
3199: foperr_out_jmp:
3200: bfextu %d0{&19:&3},%d0 # extract dst format field
3201: mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3202: mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3203: jmp (tbl_operr.b,%pc,%a0)
3204:
3205: tbl_operr:
3206: short foperr_out_l - tbl_operr # long word integer
3207: short tbl_operr - tbl_operr # sgl prec shouldn't happen
3208: short tbl_operr - tbl_operr # ext prec shouldn't happen
3209: short foperr_exit - tbl_operr # packed won't enter here
3210: short foperr_out_w - tbl_operr # word integer
3211: short tbl_operr - tbl_operr # dbl prec shouldn't happen
3212: short foperr_out_b - tbl_operr # byte integer
3213: short tbl_operr - tbl_operr # packed won't enter here
3214:
3215: foperr_out_b:
3216: mov.b L_SCR1(%a6),%d0 # load positive default result
3217: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3218: ble.b foperr_out_b_save_dn # yes
3219: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3220: bsr.l _dmem_write_byte # write the default result
3221:
3222: tst.l %d1 # did dstore fail?
3223: bne.l facc_out_b # yes
3224:
3225: bra.w foperr_exit
3226: foperr_out_b_save_dn:
3227: andi.w &0x0007,%d1
3228: bsr.l store_dreg_b # store result to regfile
3229: bra.w foperr_exit
3230:
3231: foperr_out_w:
3232: mov.w L_SCR1(%a6),%d0 # load positive default result
3233: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3234: ble.b foperr_out_w_save_dn # yes
3235: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3236: bsr.l _dmem_write_word # write the default result
3237:
3238: tst.l %d1 # did dstore fail?
3239: bne.l facc_out_w # yes
3240:
3241: bra.w foperr_exit
3242: foperr_out_w_save_dn:
3243: andi.w &0x0007,%d1
3244: bsr.l store_dreg_w # store result to regfile
3245: bra.w foperr_exit
3246:
3247: foperr_out_l:
3248: mov.l L_SCR1(%a6),%d0 # load positive default result
3249: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3250: ble.b foperr_out_l_save_dn # yes
3251: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3252: bsr.l _dmem_write_long # write the default result
3253:
3254: tst.l %d1 # did dstore fail?
3255: bne.l facc_out_l # yes
3256:
3257: bra.w foperr_exit
3258: foperr_out_l_save_dn:
3259: andi.w &0x0007,%d1
3260: bsr.l store_dreg_l # store result to regfile
3261: bra.w foperr_exit
3262:
3263: #########################################################################
3264: # XDEF **************************************************************** #
3265: # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3266: # #
3267: # This handler should be the first code executed upon taking the #
3268: # FP Signalling NAN exception in an operating system. #
3269: # #
3270: # XREF **************************************************************** #
3271: # _imem_read_long() - read instruction longword #
3272: # fix_skewed_ops() - adjust src operand in fsave frame #
3273: # _real_snan() - "callout" to operating system SNAN handler #
3274: # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3275: # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3276: # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3277: # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3278: # #
3279: # INPUT *************************************************************** #
3280: # - The system stack contains the FP SNAN exception frame #
3281: # - The fsave frame contains the source operand #
3282: # #
3283: # OUTPUT ************************************************************** #
3284: # No access error: #
3285: # - The system stack is unchanged #
3286: # - The fsave frame contains the adjusted src op for opclass 0,2 #
3287: # #
3288: # ALGORITHM *********************************************************** #
3289: # In a system where the FP SNAN exception is enabled, the goal #
3290: # is to get to the handler specified at _real_snan(). But, on the 060, #
3291: # for opclass zero and two instructions taking this exception, the #
3292: # input operand in the fsave frame may be incorrect for some cases #
3293: # and needs to be corrected. This handler calls fix_skewed_ops() to #
3294: # do just this and then exits through _real_snan(). #
3295: # For opclass 3 instructions, the 060 doesn't store the default #
3296: # SNAN result out to memory or data register file as it should. #
3297: # This code must emulate the move out before finally exiting through #
3298: # _real_snan(). The move out, if to memory, is performed using #
3299: # _mem_write() "callout" routines that may return a failing result. #
3300: # In this special case, the handler must exit through facc_out() #
3301: # which creates an access error stack frame from the current SNAN #
3302: # stack frame. #
3303: # For the case of an extended precision opclass 3 instruction, #
3304: # if the effective addressing mode was -() or ()+, then the address #
3305: # register must get updated by calling _calc_ea_fout(). If the <ea> #
3306: # was -(a7) from supervisor mode, then the exception frame currently #
3307: # on the system stack must be carefully moved "down" to make room #
3308: # for the operand being moved. #
3309: # #
3310: #########################################################################
3311:
3312: global _fpsp_snan
3313: _fpsp_snan:
3314:
3315: link.w %a6,&-LOCAL_SIZE # init stack frame
3316:
3317: fsave FP_SRC(%a6) # grab the "busy" frame
3318:
3319: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3320: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3321: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3322:
3323: # the FPIAR holds the "current PC" of the faulting instruction
3324: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3325:
3326: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3327: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3328: bsr.l _imem_read_long # fetch the instruction words
3329: mov.l %d0,EXC_OPWORD(%a6)
3330:
3331: ##############################################################################
3332:
3333: btst &13,%d0 # is instr an fmove out?
3334: bne.w fsnan_out # fmove out
3335:
3336:
3337: # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3338: # this would be the case for opclass two operations with a source infinity or
3339: # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3340: # fixed here.
3341: lea FP_SRC(%a6),%a0 # pass: ptr to src op
3342: bsr.l fix_skewed_ops # fix src op
3343:
3344: fsnan_exit:
3345: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3346: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3347: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3348:
3349: frestore FP_SRC(%a6)
3350:
3351: unlk %a6
3352: bra.l _real_snan
3353:
3354: ########################################################################
3355:
3356: #
3357: # the hardware does not save the default result to memory on enabled
3358: # snan exceptions. we do this here before passing control to
3359: # the user snan handler.
3360: #
3361: # byte, word, long, and packed destination format operations can pass
3362: # through here. since packed format operations already were handled by
3363: # fpsp_unsupp(), then we need to do nothing else for them here.
3364: # for byte, word, and long, we simply need to test the sign of the src
3365: # operand and save the appropriate minimum or maximum integer value
3366: # to the effective address as pointed to by the stacked effective address.
3367: #
3368: fsnan_out:
3369:
3370: bfextu %d0{&19:&3},%d0 # extract dst format field
3371: mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3372: mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3373: jmp (tbl_snan.b,%pc,%a0)
3374:
3375: tbl_snan:
3376: short fsnan_out_l - tbl_snan # long word integer
3377: short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3378: short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3379: short tbl_snan - tbl_snan # packed needs no help
3380: short fsnan_out_w - tbl_snan # word integer
3381: short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3382: short fsnan_out_b - tbl_snan # byte integer
3383: short tbl_snan - tbl_snan # packed needs no help
3384:
3385: fsnan_out_b:
3386: mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3387: bset &6,%d0 # set SNAN bit
3388: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3389: ble.b fsnan_out_b_dn # yes
3390: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3391: bsr.l _dmem_write_byte # write the default result
3392:
3393: tst.l %d1 # did dstore fail?
3394: bne.l facc_out_b # yes
3395:
3396: bra.w fsnan_exit
3397: fsnan_out_b_dn:
3398: andi.w &0x0007,%d1
3399: bsr.l store_dreg_b # store result to regfile
3400: bra.w fsnan_exit
3401:
3402: fsnan_out_w:
3403: mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3404: bset &14,%d0 # set SNAN bit
3405: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3406: ble.b fsnan_out_w_dn # yes
3407: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3408: bsr.l _dmem_write_word # write the default result
3409:
3410: tst.l %d1 # did dstore fail?
3411: bne.l facc_out_w # yes
3412:
3413: bra.w fsnan_exit
3414: fsnan_out_w_dn:
3415: andi.w &0x0007,%d1
3416: bsr.l store_dreg_w # store result to regfile
3417: bra.w fsnan_exit
3418:
3419: fsnan_out_l:
3420: mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3421: bset &30,%d0 # set SNAN bit
3422: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3423: ble.b fsnan_out_l_dn # yes
3424: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3425: bsr.l _dmem_write_long # write the default result
3426:
3427: tst.l %d1 # did dstore fail?
3428: bne.l facc_out_l # yes
3429:
3430: bra.w fsnan_exit
3431: fsnan_out_l_dn:
3432: andi.w &0x0007,%d1
3433: bsr.l store_dreg_l # store result to regfile
3434: bra.w fsnan_exit
3435:
3436: fsnan_out_s:
3437: cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3438: ble.b fsnan_out_d_dn # yes
3439: mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3440: andi.l &0x80000000,%d0 # keep sign
3441: ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3442: mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3443: lsr.l &0x8,%d1 # shift mantissa for sgl
3444: or.l %d1,%d0 # create sgl SNAN
3445: mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3446: bsr.l _dmem_write_long # write the default result
3447:
3448: tst.l %d1 # did dstore fail?
3449: bne.l facc_out_l # yes
3450:
3451: bra.w fsnan_exit
3452: fsnan_out_d_dn:
3453: mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3454: andi.l &0x80000000,%d0 # keep sign
3455: ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3456: mov.l %d1,-(%sp)
3457: mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3458: lsr.l &0x8,%d1 # shift mantissa for sgl
3459: or.l %d1,%d0 # create sgl SNAN
3460: mov.l (%sp)+,%d1
3461: andi.w &0x0007,%d1
3462: bsr.l store_dreg_l # store result to regfile
3463: bra.w fsnan_exit
3464:
3465: fsnan_out_d:
3466: mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3467: andi.l &0x80000000,%d0 # keep sign
3468: ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3469: mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3470: mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3471: mov.l &11,%d0 # load shift amt
3472: lsr.l %d0,%d1
3473: or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3474: mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3475: andi.l &0x000007ff,%d1
3476: ror.l %d0,%d1
3477: mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3478: mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3479: lsr.l %d0,%d1
3480: or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3481: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3482: mov.l EXC_EA(%a6),%a1 # pass: dst addr
3483: movq.l &0x8,%d0 # pass: size of 8 bytes
3484: bsr.l _dmem_write # write the default result
3485:
3486: tst.l %d1 # did dstore fail?
3487: bne.l facc_out_d # yes
3488:
3489: bra.w fsnan_exit
3490:
3491: # for extended precision, if the addressing mode is pre-decrement or
3492: # post-increment, then the address register did not get updated.
3493: # in addition, for pre-decrement, the stacked <ea> is incorrect.
3494: fsnan_out_x:
3495: clr.b SPCOND_FLG(%a6) # clear special case flag
3496:
3497: mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3498: clr.w 2+FP_SCR0(%a6)
3499: mov.l FP_SRC_HI(%a6),%d0
3500: bset &30,%d0
3501: mov.l %d0,FP_SCR0_HI(%a6)
3502: mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3503:
3504: btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3505: bne.b fsnan_out_x_s # yes
3506:
3507: mov.l %usp,%a0 # fetch user stack pointer
3508: mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3509: mov.l (%a6),EXC_A6(%a6)
3510:
3511: bsr.l _calc_ea_fout # find the correct ea,update An
3512: mov.l %a0,%a1
3513: mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3514:
3515: mov.l EXC_A7(%a6),%a0
3516: mov.l %a0,%usp # restore user stack pointer
3517: mov.l EXC_A6(%a6),(%a6)
3518:
3519: fsnan_out_x_save:
3520: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3521: movq.l &0xc,%d0 # pass: size of extended
3522: bsr.l _dmem_write # write the default result
3523:
3524: tst.l %d1 # did dstore fail?
3525: bne.l facc_out_x # yes
3526:
3527: bra.w fsnan_exit
3528:
3529: fsnan_out_x_s:
3530: mov.l (%a6),EXC_A6(%a6)
3531:
3532: bsr.l _calc_ea_fout # find the correct ea,update An
3533: mov.l %a0,%a1
3534: mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3535:
3536: mov.l EXC_A6(%a6),(%a6)
3537:
3538: cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3539: bne.b fsnan_out_x_save # no
3540:
3541: # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3542: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3543: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3544: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3545:
3546: frestore FP_SRC(%a6)
3547:
3548: mov.l EXC_A6(%a6),%a6 # restore frame pointer
3549:
3550: mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3551: mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3552: mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3553:
3554: mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3555: mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3556: mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3557:
3558: add.l &LOCAL_SIZE-0x8,%sp
3559:
3560: bra.l _real_snan
3561:
3562: #########################################################################
3563: # XDEF **************************************************************** #
3564: # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3565: # #
3566: # This handler should be the first code executed upon taking the #
3567: # FP Inexact exception in an operating system. #
3568: # #
3569: # XREF **************************************************************** #
3570: # _imem_read_long() - read instruction longword #
3571: # fix_skewed_ops() - adjust src operand in fsave frame #
3572: # set_tag_x() - determine optype of src/dst operands #
3573: # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3574: # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3575: # load_fpn2() - load dst operand from FP regfile #
3576: # smovcr() - emulate an "fmovcr" instruction #
3577: # fout() - emulate an opclass 3 instruction #
3578: # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3579: # _real_inex() - "callout" to operating system inexact handler #
3580: # #
3581: # INPUT *************************************************************** #
3582: # - The system stack contains the FP Inexact exception frame #
3583: # - The fsave frame contains the source operand #
3584: # #
3585: # OUTPUT ************************************************************** #
3586: # - The system stack is unchanged #
3587: # - The fsave frame contains the adjusted src op for opclass 0,2 #
3588: # #
3589: # ALGORITHM *********************************************************** #
3590: # In a system where the FP Inexact exception is enabled, the goal #
3591: # is to get to the handler specified at _real_inex(). But, on the 060, #
3592: # for opclass zero and two instruction taking this exception, the #
3593: # hardware doesn't store the correct result to the destination FP #
3594: # register as did the '040 and '881/2. This handler must emulate the #
3595: # instruction in order to get this value and then store it to the #
3596: # correct register before calling _real_inex(). #
3597: # For opclass 3 instructions, the 060 doesn't store the default #
3598: # inexact result out to memory or data register file as it should. #
3599: # This code must emulate the move out by calling fout() before finally #
3600: # exiting through _real_inex(). #
3601: # #
3602: #########################################################################
3603:
3604: global _fpsp_inex
3605: _fpsp_inex:
3606:
3607: link.w %a6,&-LOCAL_SIZE # init stack frame
3608:
3609: fsave FP_SRC(%a6) # grab the "busy" frame
3610:
3611: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3612: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3613: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3614:
3615: # the FPIAR holds the "current PC" of the faulting instruction
3616: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3617:
3618: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3619: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3620: bsr.l _imem_read_long # fetch the instruction words
3621: mov.l %d0,EXC_OPWORD(%a6)
3622:
3623: ##############################################################################
3624:
3625: btst &13,%d0 # is instr an fmove out?
3626: bne.w finex_out # fmove out
3627:
3628:
3629: # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3630: # longword integer directly into the upper longword of the mantissa along
3631: # w/ an exponent value of 0x401e. we convert this to extended precision here.
3632: bfextu %d0{&19:&3},%d0 # fetch instr size
3633: bne.b finex_cont # instr size is not long
3634: cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3635: bne.b finex_cont # no
3636: fmov.l &0x0,%fpcr
3637: fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3638: fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3639: mov.w &0xe001,0x2+FP_SRC(%a6)
3640:
3641: finex_cont:
3642: lea FP_SRC(%a6),%a0 # pass: ptr to src op
3643: bsr.l fix_skewed_ops # fix src op
3644:
3645: # Here, we zero the ccode and exception byte field since we're going to
3646: # emulate the whole instruction. Notice, though, that we don't kill the
3647: # INEX1 bit. This is because a packed op has long since been converted
3648: # to extended before arriving here. Therefore, we need to retain the
3649: # INEX1 bit from when the operand was first converted.
3650: andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3651:
3652: fmov.l &0x0,%fpcr # zero current control regs
3653: fmov.l &0x0,%fpsr
3654:
3655: bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3656: cmpi.b %d1,&0x17 # is op an fmovecr?
3657: beq.w finex_fmovcr # yes
3658:
3659: lea FP_SRC(%a6),%a0 # pass: ptr to src op
3660: bsr.l set_tag_x # tag the operand type
3661: mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3662:
3663: # bits four and five of the fp extension word separate the monadic and dyadic
3664: # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3665: # will never take this exception, but fsincos will.
3666: btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3667: beq.b finex_extract # monadic
3668:
3669: btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3670: bne.b finex_extract # yes
3671:
3672: bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3673: bsr.l load_fpn2 # load dst into FP_DST
3674:
3675: lea FP_DST(%a6),%a0 # pass: ptr to dst op
3676: bsr.l set_tag_x # tag the operand type
3677: cmpi.b %d0,&UNNORM # is operand an UNNORM?
3678: bne.b finex_op2_done # no
3679: bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3680: finex_op2_done:
3681: mov.b %d0,DTAG(%a6) # save dst optype tag
3682:
3683: finex_extract:
3684: clr.l %d0
3685: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3686:
3687: mov.b 1+EXC_CMDREG(%a6),%d1
3688: andi.w &0x007f,%d1 # extract extension
3689:
3690: lea FP_SRC(%a6),%a0
3691: lea FP_DST(%a6),%a1
3692:
3693: mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3694: jsr (tbl_unsupp.l,%pc,%d1.l*1)
3695:
3696: # the operation has been emulated. the result is in fp0.
3697: finex_save:
3698: bfextu EXC_CMDREG(%a6){&6:&3},%d0
3699: bsr.l store_fpreg
3700:
3701: finex_exit:
3702: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3703: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3704: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3705:
3706: frestore FP_SRC(%a6)
3707:
3708: unlk %a6
3709: bra.l _real_inex
3710:
3711: finex_fmovcr:
3712: clr.l %d0
3713: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3714: mov.b 1+EXC_CMDREG(%a6),%d1
3715: andi.l &0x0000007f,%d1 # pass rom offset
3716: bsr.l smovcr
3717: bra.b finex_save
3718:
3719: ########################################################################
3720:
3721: #
3722: # the hardware does not save the default result to memory on enabled
3723: # inexact exceptions. we do this here before passing control to
3724: # the user inexact handler.
3725: #
3726: # byte, word, and long destination format operations can pass
3727: # through here. so can double and single precision.
3728: # although packed opclass three operations can take inexact
3729: # exceptions, they won't pass through here since they are caught
3730: # first by the unsupported data format exception handler. that handler
3731: # sends them directly to _real_inex() if necessary.
3732: #
3733: finex_out:
3734:
3735: mov.b &NORM,STAG(%a6) # src is a NORM
3736:
3737: clr.l %d0
3738: mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3739:
3740: andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3741:
3742: lea FP_SRC(%a6),%a0 # pass ptr to src operand
3743:
3744: bsr.l fout # store the default result
3745:
3746: bra.b finex_exit
3747:
3748: #########################################################################
3749: # XDEF **************************************************************** #
3750: # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3751: # #
3752: # This handler should be the first code executed upon taking #
3753: # the FP DZ exception in an operating system. #
3754: # #
3755: # XREF **************************************************************** #
3756: # _imem_read_long() - read instruction longword from memory #
3757: # fix_skewed_ops() - adjust fsave operand #
3758: # _real_dz() - "callout" exit point from FP DZ handler #
3759: # #
3760: # INPUT *************************************************************** #
3761: # - The system stack contains the FP DZ exception stack. #
3762: # - The fsave frame contains the source operand. #
3763: # #
3764: # OUTPUT ************************************************************** #
3765: # - The system stack contains the FP DZ exception stack. #
3766: # - The fsave frame contains the adjusted source operand. #
3767: # #
3768: # ALGORITHM *********************************************************** #
3769: # In a system where the DZ exception is enabled, the goal is to #
3770: # get to the handler specified at _real_dz(). But, on the 060, when the #
3771: # exception is taken, the input operand in the fsave state frame may #
3772: # be incorrect for some cases and need to be adjusted. So, this package #
3773: # adjusts the operand using fix_skewed_ops() and then branches to #
3774: # _real_dz(). #
3775: # #
3776: #########################################################################
3777:
3778: global _fpsp_dz
3779: _fpsp_dz:
3780:
3781: link.w %a6,&-LOCAL_SIZE # init stack frame
3782:
3783: fsave FP_SRC(%a6) # grab the "busy" frame
3784:
3785: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3786: fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3787: fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3788:
3789: # the FPIAR holds the "current PC" of the faulting instruction
3790: mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3791:
3792: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3793: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3794: bsr.l _imem_read_long # fetch the instruction words
3795: mov.l %d0,EXC_OPWORD(%a6)
3796:
3797: ##############################################################################
3798:
3799:
3800: # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3801: # this would be the case for opclass two operations with a source zero
3802: # in the sgl or dbl format.
3803: lea FP_SRC(%a6),%a0 # pass: ptr to src op
3804: bsr.l fix_skewed_ops # fix src op
3805:
3806: fdz_exit:
3807: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3808: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3809: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3810:
3811: frestore FP_SRC(%a6)
3812:
3813: unlk %a6
3814: bra.l _real_dz
3815:
3816: #########################################################################
3817: # XDEF **************************************************************** #
3818: # _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3819: # exception when the "reduced" version of the #
3820: # FPSP is implemented that does not emulate #
3821: # FP unimplemented instructions. #
3822: # #
3823: # This handler should be the first code executed upon taking a #
3824: # "Line F Emulator" exception in an operating system integrating #
3825: # the reduced version of 060FPSP. #
3826: # #
3827: # XREF **************************************************************** #
3828: # _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3829: # _real_fline() - Handle all other cases (treated equally) #
3830: # #
3831: # INPUT *************************************************************** #
3832: # - The system stack contains a "Line F Emulator" exception #
3833: # stack frame. #
3834: # #
3835: # OUTPUT ************************************************************** #
3836: # - The system stack is unchanged. #
3837: # #
3838: # ALGORITHM *********************************************************** #
3839: # When a "Line F Emulator" exception occurs in a system where #
3840: # "FPU Unimplemented" instructions will not be emulated, the exception #
3841: # can occur because then FPU is disabled or the instruction is to be #
3842: # classifed as "Line F". This module determines which case exists and #
3843: # calls the appropriate "callout". #
3844: # #
3845: #########################################################################
3846:
3847: global _fpsp_fline
3848: _fpsp_fline:
3849:
3850: # check to see if the FPU is disabled. if so, jump to the OS entry
3851: # point for that condition.
3852: cmpi.w 0x6(%sp),&0x402c
3853: beq.l _real_fpu_disabled
3854:
3855: bra.l _real_fline
3856:
3857: #########################################################################
3858: # XDEF **************************************************************** #
3859: # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3860: # #
3861: # XREF **************************************************************** #
3862: # inc_areg() - increment an address register #
3863: # dec_areg() - decrement an address register #
3864: # #
3865: # INPUT *************************************************************** #
3866: # d0 = number of bytes to adjust <ea> by #
3867: # #
3868: # OUTPUT ************************************************************** #
3869: # None #
3870: # #
3871: # ALGORITHM *********************************************************** #
3872: # "Dummy" CALCulate Effective Address: #
3873: # The stacked <ea> for FP unimplemented instructions and opclass #
3874: # two packed instructions is correct with the exception of... #
3875: # #
3876: # 1) -(An) : The register is not updated regardless of size. #
3877: # Also, for extended precision and packed, the #
3878: # stacked <ea> value is 8 bytes too big #
3879: # 2) (An)+ : The register is not updated. #
3880: # 3) #<data> : The upper longword of the immediate operand is #
3881: # stacked b,w,l and s sizes are completely stacked. #
3882: # d,x, and p are not. #
3883: # #
3884: #########################################################################
3885:
3886: global _dcalc_ea
3887: _dcalc_ea:
3888: mov.l %d0, %a0 # move # bytes to %a0
3889:
3890: mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3891: mov.l %d0, %d1 # make a copy
3892:
3893: andi.w &0x38, %d0 # extract mode field
3894: andi.l &0x7, %d1 # extract reg field
3895:
3896: cmpi.b %d0,&0x18 # is mode (An)+ ?
3897: beq.b dcea_pi # yes
3898:
3899: cmpi.b %d0,&0x20 # is mode -(An) ?
3900: beq.b dcea_pd # yes
3901:
3902: or.w %d1,%d0 # concat mode,reg
3903: cmpi.b %d0,&0x3c # is mode #<data>?
3904:
3905: beq.b dcea_imm # yes
3906:
3907: mov.l EXC_EA(%a6),%a0 # return <ea>
3908: rts
3909:
3910: # need to set immediate data flag here since we'll need to do
3911: # an imem_read to fetch this later.
3912: dcea_imm:
3913: mov.b &immed_flg,SPCOND_FLG(%a6)
3914: lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3915: rts
3916:
3917: # here, the <ea> is stacked correctly. however, we must update the
3918: # address register...
3919: dcea_pi:
3920: mov.l %a0,%d0 # pass amt to inc by
3921: bsr.l inc_areg # inc addr register
3922:
3923: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3924: rts
3925:
3926: # the <ea> is stacked correctly for all but extended and packed which
3927: # the <ea>s are 8 bytes too large.
3928: # it would make no sense to have a pre-decrement to a7 in supervisor
3929: # mode so we don't even worry about this tricky case here : )
3930: dcea_pd:
3931: mov.l %a0,%d0 # pass amt to dec by
3932: bsr.l dec_areg # dec addr register
3933:
3934: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3935:
3936: cmpi.b %d0,&0xc # is opsize ext or packed?
3937: beq.b dcea_pd2 # yes
3938: rts
3939: dcea_pd2:
3940: sub.l &0x8,%a0 # correct <ea>
3941: mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3942: rts
3943:
3944: #########################################################################
3945: # XDEF **************************************************************** #
3946: # _calc_ea_fout(): calculate correct stacked <ea> for extended #
3947: # and packed data opclass 3 operations. #
3948: # #
3949: # XREF **************************************************************** #
3950: # None #
3951: # #
3952: # INPUT *************************************************************** #
3953: # None #
3954: # #
3955: # OUTPUT ************************************************************** #
3956: # a0 = return correct effective address #
3957: # #
3958: # ALGORITHM *********************************************************** #
3959: # For opclass 3 extended and packed data operations, the <ea> #
3960: # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3961: # modes. Also, while we're at it, the index register itself must get #
3962: # updated. #
3963: # So, for -(an), we must subtract 8 off of the stacked <ea> value #
3964: # and return that value as the correct <ea> and store that value in An. #
3965: # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3966: # #
3967: #########################################################################
3968:
3969: # This calc_ea is currently used to retrieve the correct <ea>
3970: # for fmove outs of type extended and packed.
3971: global _calc_ea_fout
3972: _calc_ea_fout:
3973: mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3974: mov.l %d0,%d1 # make a copy
3975:
3976: andi.w &0x38,%d0 # extract mode field
3977: andi.l &0x7,%d1 # extract reg field
3978:
3979: cmpi.b %d0,&0x18 # is mode (An)+ ?
3980: beq.b ceaf_pi # yes
3981:
3982: cmpi.b %d0,&0x20 # is mode -(An) ?
3983: beq.w ceaf_pd # yes
3984:
3985: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3986: rts
3987:
3988: # (An)+ : extended and packed fmove out
3989: # : stacked <ea> is correct
3990: # : "An" not updated
3991: ceaf_pi:
3992: mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3993: mov.l EXC_EA(%a6),%a0
3994: jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3995:
3996: swbeg &0x8
3997: tbl_ceaf_pi:
3998: short ceaf_pi0 - tbl_ceaf_pi
3999: short ceaf_pi1 - tbl_ceaf_pi
4000: short ceaf_pi2 - tbl_ceaf_pi
4001: short ceaf_pi3 - tbl_ceaf_pi
4002: short ceaf_pi4 - tbl_ceaf_pi
4003: short ceaf_pi5 - tbl_ceaf_pi
4004: short ceaf_pi6 - tbl_ceaf_pi
4005: short ceaf_pi7 - tbl_ceaf_pi
4006:
4007: ceaf_pi0:
4008: addi.l &0xc,EXC_DREGS+0x8(%a6)
4009: rts
4010: ceaf_pi1:
4011: addi.l &0xc,EXC_DREGS+0xc(%a6)
4012: rts
4013: ceaf_pi2:
4014: add.l &0xc,%a2
4015: rts
4016: ceaf_pi3:
4017: add.l &0xc,%a3
4018: rts
4019: ceaf_pi4:
4020: add.l &0xc,%a4
4021: rts
4022: ceaf_pi5:
4023: add.l &0xc,%a5
4024: rts
4025: ceaf_pi6:
4026: addi.l &0xc,EXC_A6(%a6)
4027: rts
4028: ceaf_pi7:
4029: mov.b &mia7_flg,SPCOND_FLG(%a6)
4030: addi.l &0xc,EXC_A7(%a6)
4031: rts
4032:
4033: # -(An) : extended and packed fmove out
4034: # : stacked <ea> = actual <ea> + 8
4035: # : "An" not updated
4036: ceaf_pd:
4037: mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4038: mov.l EXC_EA(%a6),%a0
4039: sub.l &0x8,%a0
4040: sub.l &0x8,EXC_EA(%a6)
4041: jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4042:
4043: swbeg &0x8
4044: tbl_ceaf_pd:
4045: short ceaf_pd0 - tbl_ceaf_pd
4046: short ceaf_pd1 - tbl_ceaf_pd
4047: short ceaf_pd2 - tbl_ceaf_pd
4048: short ceaf_pd3 - tbl_ceaf_pd
4049: short ceaf_pd4 - tbl_ceaf_pd
4050: short ceaf_pd5 - tbl_ceaf_pd
4051: short ceaf_pd6 - tbl_ceaf_pd
4052: short ceaf_pd7 - tbl_ceaf_pd
4053:
4054: ceaf_pd0:
4055: mov.l %a0,EXC_DREGS+0x8(%a6)
4056: rts
4057: ceaf_pd1:
4058: mov.l %a0,EXC_DREGS+0xc(%a6)
4059: rts
4060: ceaf_pd2:
4061: mov.l %a0,%a2
4062: rts
4063: ceaf_pd3:
4064: mov.l %a0,%a3
4065: rts
4066: ceaf_pd4:
4067: mov.l %a0,%a4
4068: rts
4069: ceaf_pd5:
4070: mov.l %a0,%a5
4071: rts
4072: ceaf_pd6:
4073: mov.l %a0,EXC_A6(%a6)
4074: rts
4075: ceaf_pd7:
4076: mov.l %a0,EXC_A7(%a6)
4077: mov.b &mda7_flg,SPCOND_FLG(%a6)
4078: rts
4079:
4080: #
4081: # This table holds the offsets of the emulation routines for each individual
4082: # math operation relative to the address of this table. Included are
4083: # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4084: # this table is for the version if the 060FPSP without transcendentals.
4085: # The location within the table is determined by the extension bits of the
4086: # operation longword.
4087: #
4088:
4089: swbeg &109
4090: tbl_unsupp:
4091: long fin - tbl_unsupp # 00: fmove
4092: long fint - tbl_unsupp # 01: fint
4093: long tbl_unsupp - tbl_unsupp # 02: fsinh
4094: long fintrz - tbl_unsupp # 03: fintrz
4095: long fsqrt - tbl_unsupp # 04: fsqrt
4096: long tbl_unsupp - tbl_unsupp
4097: long tbl_unsupp - tbl_unsupp # 06: flognp1
4098: long tbl_unsupp - tbl_unsupp
4099: long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4100: long tbl_unsupp - tbl_unsupp # 09: ftanh
4101: long tbl_unsupp - tbl_unsupp # 0a: fatan
4102: long tbl_unsupp - tbl_unsupp
4103: long tbl_unsupp - tbl_unsupp # 0c: fasin
4104: long tbl_unsupp - tbl_unsupp # 0d: fatanh
4105: long tbl_unsupp - tbl_unsupp # 0e: fsin
4106: long tbl_unsupp - tbl_unsupp # 0f: ftan
4107: long tbl_unsupp - tbl_unsupp # 10: fetox
4108: long tbl_unsupp - tbl_unsupp # 11: ftwotox
4109: long tbl_unsupp - tbl_unsupp # 12: ftentox
4110: long tbl_unsupp - tbl_unsupp
4111: long tbl_unsupp - tbl_unsupp # 14: flogn
4112: long tbl_unsupp - tbl_unsupp # 15: flog10
4113: long tbl_unsupp - tbl_unsupp # 16: flog2
4114: long tbl_unsupp - tbl_unsupp
4115: long fabs - tbl_unsupp # 18: fabs
4116: long tbl_unsupp - tbl_unsupp # 19: fcosh
4117: long fneg - tbl_unsupp # 1a: fneg
4118: long tbl_unsupp - tbl_unsupp
4119: long tbl_unsupp - tbl_unsupp # 1c: facos
4120: long tbl_unsupp - tbl_unsupp # 1d: fcos
4121: long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4122: long tbl_unsupp - tbl_unsupp # 1f: fgetman
4123: long fdiv - tbl_unsupp # 20: fdiv
4124: long tbl_unsupp - tbl_unsupp # 21: fmod
4125: long fadd - tbl_unsupp # 22: fadd
4126: long fmul - tbl_unsupp # 23: fmul
4127: long fsgldiv - tbl_unsupp # 24: fsgldiv
4128: long tbl_unsupp - tbl_unsupp # 25: frem
4129: long tbl_unsupp - tbl_unsupp # 26: fscale
4130: long fsglmul - tbl_unsupp # 27: fsglmul
4131: long fsub - tbl_unsupp # 28: fsub
4132: long tbl_unsupp - tbl_unsupp
4133: long tbl_unsupp - tbl_unsupp
4134: long tbl_unsupp - tbl_unsupp
4135: long tbl_unsupp - tbl_unsupp
4136: long tbl_unsupp - tbl_unsupp
4137: long tbl_unsupp - tbl_unsupp
4138: long tbl_unsupp - tbl_unsupp
4139: long tbl_unsupp - tbl_unsupp # 30: fsincos
4140: long tbl_unsupp - tbl_unsupp # 31: fsincos
4141: long tbl_unsupp - tbl_unsupp # 32: fsincos
4142: long tbl_unsupp - tbl_unsupp # 33: fsincos
4143: long tbl_unsupp - tbl_unsupp # 34: fsincos
4144: long tbl_unsupp - tbl_unsupp # 35: fsincos
4145: long tbl_unsupp - tbl_unsupp # 36: fsincos
4146: long tbl_unsupp - tbl_unsupp # 37: fsincos
4147: long fcmp - tbl_unsupp # 38: fcmp
4148: long tbl_unsupp - tbl_unsupp
4149: long ftst - tbl_unsupp # 3a: ftst
4150: long tbl_unsupp - tbl_unsupp
4151: long tbl_unsupp - tbl_unsupp
4152: long tbl_unsupp - tbl_unsupp
4153: long tbl_unsupp - tbl_unsupp
4154: long tbl_unsupp - tbl_unsupp
4155: long fsin - tbl_unsupp # 40: fsmove
4156: long fssqrt - tbl_unsupp # 41: fssqrt
4157: long tbl_unsupp - tbl_unsupp
4158: long tbl_unsupp - tbl_unsupp
4159: long fdin - tbl_unsupp # 44: fdmove
4160: long fdsqrt - tbl_unsupp # 45: fdsqrt
4161: long tbl_unsupp - tbl_unsupp
4162: long tbl_unsupp - tbl_unsupp
4163: long tbl_unsupp - tbl_unsupp
4164: long tbl_unsupp - tbl_unsupp
4165: long tbl_unsupp - tbl_unsupp
4166: long tbl_unsupp - tbl_unsupp
4167: long tbl_unsupp - tbl_unsupp
4168: long tbl_unsupp - tbl_unsupp
4169: long tbl_unsupp - tbl_unsupp
4170: long tbl_unsupp - tbl_unsupp
4171: long tbl_unsupp - tbl_unsupp
4172: long tbl_unsupp - tbl_unsupp
4173: long tbl_unsupp - tbl_unsupp
4174: long tbl_unsupp - tbl_unsupp
4175: long tbl_unsupp - tbl_unsupp
4176: long tbl_unsupp - tbl_unsupp
4177: long tbl_unsupp - tbl_unsupp
4178: long tbl_unsupp - tbl_unsupp
4179: long fsabs - tbl_unsupp # 58: fsabs
4180: long tbl_unsupp - tbl_unsupp
4181: long fsneg - tbl_unsupp # 5a: fsneg
4182: long tbl_unsupp - tbl_unsupp
4183: long fdabs - tbl_unsupp # 5c: fdabs
4184: long tbl_unsupp - tbl_unsupp
4185: long fdneg - tbl_unsupp # 5e: fdneg
4186: long tbl_unsupp - tbl_unsupp
4187: long fsdiv - tbl_unsupp # 60: fsdiv
4188: long tbl_unsupp - tbl_unsupp
4189: long fsadd - tbl_unsupp # 62: fsadd
4190: long fsmul - tbl_unsupp # 63: fsmul
4191: long fddiv - tbl_unsupp # 64: fddiv
4192: long tbl_unsupp - tbl_unsupp
4193: long fdadd - tbl_unsupp # 66: fdadd
4194: long fdmul - tbl_unsupp # 67: fdmul
4195: long fssub - tbl_unsupp # 68: fssub
4196: long tbl_unsupp - tbl_unsupp
4197: long tbl_unsupp - tbl_unsupp
4198: long tbl_unsupp - tbl_unsupp
4199: long fdsub - tbl_unsupp # 6c: fdsub
4200:
4201: #################################################
4202: # Add this here so non-fp modules can compile.
4203: # (smovcr is called from fpsp_inex.)
4204: global smovcr
4205: smovcr:
4206: bra.b smovcr
4207:
4208: #########################################################################
4209: # XDEF **************************************************************** #
4210: # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4211: # #
4212: # XREF **************************************************************** #
4213: # fetch_dreg() - fetch data register #
4214: # {i,d,}mem_read() - fetch data from memory #
4215: # _mem_write() - write data to memory #
4216: # iea_iacc() - instruction memory access error occurred #
4217: # iea_dacc() - data memory access error occurred #
4218: # restore() - restore An index regs if access error occurred #
4219: # #
4220: # INPUT *************************************************************** #
4221: # None #
4222: # #
4223: # OUTPUT ************************************************************** #
4224: # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4225: # d0 = size of dump #
4226: # d1 = Dn #
4227: # Else if instruction access error, #
4228: # d0 = FSLW #
4229: # Else if data access error, #
4230: # d0 = FSLW #
4231: # a0 = address of fault #
4232: # Else #
4233: # none. #
4234: # #
4235: # ALGORITHM *********************************************************** #
4236: # The effective address must be calculated since this is entered #
4237: # from an "Unimplemented Effective Address" exception handler. So, we #
4238: # have our own fcalc_ea() routine here. If an access error is flagged #
4239: # by a _{i,d,}mem_read() call, we must exit through the special #
4240: # handler. #
4241: # The data register is determined and its value loaded to get the #
4242: # string of FP registers affected. This value is used as an index into #
4243: # a lookup table such that we can determine the number of bytes #
4244: # involved. #
4245: # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4246: # to read in all FP values. Again, _mem_read() may fail and require a #
4247: # special exit. #
4248: # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4249: # to write all FP values. _mem_write() may also fail. #
4250: # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4251: # then we return the size of the dump and the string to the caller #
4252: # so that the move can occur outside of this routine. This special #
4253: # case is required so that moves to the system stack are handled #
4254: # correctly. #
4255: # #
4256: # DYNAMIC: #
4257: # fmovm.x dn, <ea> #
4258: # fmovm.x <ea>, dn #
4259: # #
4260: # <WORD 1> <WORD2> #
4261: # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4262: # #
4263: # & = (0): predecrement addressing mode #
4264: # (1): postincrement or control addressing mode #
4265: # @ = (0): move listed regs from memory to the FPU #
4266: # (1): move listed regs from the FPU to memory #
4267: # $$$ : index of data register holding reg select mask #
4268: # #
4269: # NOTES: #
4270: # If the data register holds a zero, then the #
4271: # instruction is a nop. #
4272: # #
4273: #########################################################################
4274:
4275: global fmovm_dynamic
4276: fmovm_dynamic:
4277:
4278: # extract the data register in which the bit string resides...
4279: mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4280: andi.w &0x70,%d1 # extract reg bits
4281: lsr.b &0x4,%d1 # shift into lo bits
4282:
4283: # fetch the bit string into d0...
4284: bsr.l fetch_dreg # fetch reg string
4285:
4286: andi.l &0x000000ff,%d0 # keep only lo byte
4287:
4288: mov.l %d0,-(%sp) # save strg
4289: mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4290: mov.l %d0,-(%sp) # save size
4291: bsr.l fmovm_calc_ea # calculate <ea>
4292: mov.l (%sp)+,%d0 # restore size
4293: mov.l (%sp)+,%d1 # restore strg
4294:
4295: # if the bit string is a zero, then the operation is a no-op
4296: # but, make sure that we've calculated ea and advanced the opword pointer
4297: beq.w fmovm_data_done
4298:
4299: # separate move ins from move outs...
4300: btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4301: beq.w fmovm_data_in # it's a move out
4302:
4303: #############
4304: # MOVE OUT: #
4305: #############
4306: fmovm_data_out:
4307: btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4308: bne.w fmovm_out_ctrl # control
4309:
4310: ############################
4311: fmovm_out_predec:
4312: # for predecrement mode, the bit string is the opposite of both control
4313: # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4314: # here, we convert it to be just like the others...
4315: mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4316:
4317: btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4318: beq.b fmovm_out_ctrl # user
4319:
4320: fmovm_out_predec_s:
4321: cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4322: bne.b fmovm_out_ctrl
4323:
4324: # the operation was unfortunately an: fmovm.x dn,-(sp)
4325: # called from supervisor mode.
4326: # we're also passing "size" and "strg" back to the calling routine
4327: rts
4328:
4329: ############################
4330: fmovm_out_ctrl:
4331: mov.l %a0,%a1 # move <ea> to a1
4332:
4333: sub.l %d0,%sp # subtract size of dump
4334: lea (%sp),%a0
4335:
4336: tst.b %d1 # should FP0 be moved?
4337: bpl.b fmovm_out_ctrl_fp1 # no
4338:
4339: mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4340: mov.l 0x4+EXC_FP0(%a6),(%a0)+
4341: mov.l 0x8+EXC_FP0(%a6),(%a0)+
4342:
4343: fmovm_out_ctrl_fp1:
4344: lsl.b &0x1,%d1 # should FP1 be moved?
4345: bpl.b fmovm_out_ctrl_fp2 # no
4346:
4347: mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4348: mov.l 0x4+EXC_FP1(%a6),(%a0)+
4349: mov.l 0x8+EXC_FP1(%a6),(%a0)+
4350:
4351: fmovm_out_ctrl_fp2:
4352: lsl.b &0x1,%d1 # should FP2 be moved?
4353: bpl.b fmovm_out_ctrl_fp3 # no
4354:
4355: fmovm.x &0x20,(%a0) # yes
4356: add.l &0xc,%a0
4357:
4358: fmovm_out_ctrl_fp3:
4359: lsl.b &0x1,%d1 # should FP3 be moved?
4360: bpl.b fmovm_out_ctrl_fp4 # no
4361:
4362: fmovm.x &0x10,(%a0) # yes
4363: add.l &0xc,%a0
4364:
4365: fmovm_out_ctrl_fp4:
4366: lsl.b &0x1,%d1 # should FP4 be moved?
4367: bpl.b fmovm_out_ctrl_fp5 # no
4368:
4369: fmovm.x &0x08,(%a0) # yes
4370: add.l &0xc,%a0
4371:
4372: fmovm_out_ctrl_fp5:
4373: lsl.b &0x1,%d1 # should FP5 be moved?
4374: bpl.b fmovm_out_ctrl_fp6 # no
4375:
4376: fmovm.x &0x04,(%a0) # yes
4377: add.l &0xc,%a0
4378:
4379: fmovm_out_ctrl_fp6:
4380: lsl.b &0x1,%d1 # should FP6 be moved?
4381: bpl.b fmovm_out_ctrl_fp7 # no
4382:
4383: fmovm.x &0x02,(%a0) # yes
4384: add.l &0xc,%a0
4385:
4386: fmovm_out_ctrl_fp7:
4387: lsl.b &0x1,%d1 # should FP7 be moved?
4388: bpl.b fmovm_out_ctrl_done # no
4389:
4390: fmovm.x &0x01,(%a0) # yes
4391: add.l &0xc,%a0
4392:
4393: fmovm_out_ctrl_done:
4394: mov.l %a1,L_SCR1(%a6)
4395:
4396: lea (%sp),%a0 # pass: supervisor src
4397: mov.l %d0,-(%sp) # save size
4398: bsr.l _dmem_write # copy data to user mem
4399:
4400: mov.l (%sp)+,%d0
4401: add.l %d0,%sp # clear fpreg data from stack
4402:
4403: tst.l %d1 # did dstore err?
4404: bne.w fmovm_out_err # yes
4405:
4406: rts
4407:
4408: ############
4409: # MOVE IN: #
4410: ############
4411: fmovm_data_in:
4412: mov.l %a0,L_SCR1(%a6)
4413:
4414: sub.l %d0,%sp # make room for fpregs
4415: lea (%sp),%a1
4416:
4417: mov.l %d1,-(%sp) # save bit string for later
4418: mov.l %d0,-(%sp) # save # of bytes
4419:
4420: bsr.l _dmem_read # copy data from user mem
4421:
4422: mov.l (%sp)+,%d0 # retrieve # of bytes
4423:
4424: tst.l %d1 # did dfetch fail?
4425: bne.w fmovm_in_err # yes
4426:
4427: mov.l (%sp)+,%d1 # load bit string
4428:
4429: lea (%sp),%a0 # addr of stack
4430:
4431: tst.b %d1 # should FP0 be moved?
4432: bpl.b fmovm_data_in_fp1 # no
4433:
4434: mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4435: mov.l (%a0)+,0x4+EXC_FP0(%a6)
4436: mov.l (%a0)+,0x8+EXC_FP0(%a6)
4437:
4438: fmovm_data_in_fp1:
4439: lsl.b &0x1,%d1 # should FP1 be moved?
4440: bpl.b fmovm_data_in_fp2 # no
4441:
4442: mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4443: mov.l (%a0)+,0x4+EXC_FP1(%a6)
4444: mov.l (%a0)+,0x8+EXC_FP1(%a6)
4445:
4446: fmovm_data_in_fp2:
4447: lsl.b &0x1,%d1 # should FP2 be moved?
4448: bpl.b fmovm_data_in_fp3 # no
4449:
4450: fmovm.x (%a0)+,&0x20 # yes
4451:
4452: fmovm_data_in_fp3:
4453: lsl.b &0x1,%d1 # should FP3 be moved?
4454: bpl.b fmovm_data_in_fp4 # no
4455:
4456: fmovm.x (%a0)+,&0x10 # yes
4457:
4458: fmovm_data_in_fp4:
4459: lsl.b &0x1,%d1 # should FP4 be moved?
4460: bpl.b fmovm_data_in_fp5 # no
4461:
4462: fmovm.x (%a0)+,&0x08 # yes
4463:
4464: fmovm_data_in_fp5:
4465: lsl.b &0x1,%d1 # should FP5 be moved?
4466: bpl.b fmovm_data_in_fp6 # no
4467:
4468: fmovm.x (%a0)+,&0x04 # yes
4469:
4470: fmovm_data_in_fp6:
4471: lsl.b &0x1,%d1 # should FP6 be moved?
4472: bpl.b fmovm_data_in_fp7 # no
4473:
4474: fmovm.x (%a0)+,&0x02 # yes
4475:
4476: fmovm_data_in_fp7:
4477: lsl.b &0x1,%d1 # should FP7 be moved?
4478: bpl.b fmovm_data_in_done # no
4479:
4480: fmovm.x (%a0)+,&0x01 # yes
4481:
4482: fmovm_data_in_done:
4483: add.l %d0,%sp # remove fpregs from stack
4484: rts
4485:
4486: #####################################
4487:
4488: fmovm_data_done:
4489: rts
4490:
4491: ##############################################################################
4492:
4493: #
4494: # table indexed by the operation's bit string that gives the number
4495: # of bytes that will be moved.
4496: #
4497: # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4498: #
4499: tbl_fmovm_size:
4500: byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4501: byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4502: byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4503: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4504: byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4505: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4506: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4508: byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4509: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4511: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4513: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4514: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515: byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4516: byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4517: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4518: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4519: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4520: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4521: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4522: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4523: byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4524: byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4525: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4526: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4527: byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4528: byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4529: byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4530: byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4531: byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4532:
4533: #
4534: # table to convert a pre-decrement bit string into a post-increment
4535: # or control bit string.
4536: # ex: 0x00 ==> 0x00
4537: # 0x01 ==> 0x80
4538: # 0x02 ==> 0x40
4539: # .
4540: # .
4541: # 0xfd ==> 0xbf
4542: # 0xfe ==> 0x7f
4543: # 0xff ==> 0xff
4544: #
4545: tbl_fmovm_convert:
4546: byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4547: byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4548: byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4549: byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4550: byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4551: byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4552: byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4553: byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4554: byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4555: byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4556: byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4557: byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4558: byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4559: byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4560: byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4561: byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4562: byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4563: byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4564: byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4565: byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4566: byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4567: byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4568: byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4569: byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4570: byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4571: byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4572: byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4573: byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4574: byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4575: byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4576: byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4577: byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4578:
4579: global fmovm_calc_ea
4580: ###############################################
4581: # _fmovm_calc_ea: calculate effective address #
4582: ###############################################
4583: fmovm_calc_ea:
4584: mov.l %d0,%a0 # move # bytes to a0
4585:
4586: # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4587: # easily changed if they were inputs passed in registers.
4588: mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4589: mov.w %d0,%d1 # make a copy
4590:
4591: andi.w &0x3f,%d0 # extract mode field
4592: andi.l &0x7,%d1 # extract reg field
4593:
4594: # jump to the corresponding function for each {MODE,REG} pair.
4595: mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4596: jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4597:
4598: swbeg &64
4599: tbl_fea_mode:
4600: short tbl_fea_mode - tbl_fea_mode
4601: short tbl_fea_mode - tbl_fea_mode
4602: short tbl_fea_mode - tbl_fea_mode
4603: short tbl_fea_mode - tbl_fea_mode
4604: short tbl_fea_mode - tbl_fea_mode
4605: short tbl_fea_mode - tbl_fea_mode
4606: short tbl_fea_mode - tbl_fea_mode
4607: short tbl_fea_mode - tbl_fea_mode
4608:
4609: short tbl_fea_mode - tbl_fea_mode
4610: short tbl_fea_mode - tbl_fea_mode
4611: short tbl_fea_mode - tbl_fea_mode
4612: short tbl_fea_mode - tbl_fea_mode
4613: short tbl_fea_mode - tbl_fea_mode
4614: short tbl_fea_mode - tbl_fea_mode
4615: short tbl_fea_mode - tbl_fea_mode
4616: short tbl_fea_mode - tbl_fea_mode
4617:
4618: short faddr_ind_a0 - tbl_fea_mode
4619: short faddr_ind_a1 - tbl_fea_mode
4620: short faddr_ind_a2 - tbl_fea_mode
4621: short faddr_ind_a3 - tbl_fea_mode
4622: short faddr_ind_a4 - tbl_fea_mode
4623: short faddr_ind_a5 - tbl_fea_mode
4624: short faddr_ind_a6 - tbl_fea_mode
4625: short faddr_ind_a7 - tbl_fea_mode
4626:
4627: short faddr_ind_p_a0 - tbl_fea_mode
4628: short faddr_ind_p_a1 - tbl_fea_mode
4629: short faddr_ind_p_a2 - tbl_fea_mode
4630: short faddr_ind_p_a3 - tbl_fea_mode
4631: short faddr_ind_p_a4 - tbl_fea_mode
4632: short faddr_ind_p_a5 - tbl_fea_mode
4633: short faddr_ind_p_a6 - tbl_fea_mode
4634: short faddr_ind_p_a7 - tbl_fea_mode
4635:
4636: short faddr_ind_m_a0 - tbl_fea_mode
4637: short faddr_ind_m_a1 - tbl_fea_mode
4638: short faddr_ind_m_a2 - tbl_fea_mode
4639: short faddr_ind_m_a3 - tbl_fea_mode
4640: short faddr_ind_m_a4 - tbl_fea_mode
4641: short faddr_ind_m_a5 - tbl_fea_mode
4642: short faddr_ind_m_a6 - tbl_fea_mode
4643: short faddr_ind_m_a7 - tbl_fea_mode
4644:
4645: short faddr_ind_disp_a0 - tbl_fea_mode
4646: short faddr_ind_disp_a1 - tbl_fea_mode
4647: short faddr_ind_disp_a2 - tbl_fea_mode
4648: short faddr_ind_disp_a3 - tbl_fea_mode
4649: short faddr_ind_disp_a4 - tbl_fea_mode
4650: short faddr_ind_disp_a5 - tbl_fea_mode
4651: short faddr_ind_disp_a6 - tbl_fea_mode
4652: short faddr_ind_disp_a7 - tbl_fea_mode
4653:
4654: short faddr_ind_ext - tbl_fea_mode
4655: short faddr_ind_ext - tbl_fea_mode
4656: short faddr_ind_ext - tbl_fea_mode
4657: short faddr_ind_ext - tbl_fea_mode
4658: short faddr_ind_ext - tbl_fea_mode
4659: short faddr_ind_ext - tbl_fea_mode
4660: short faddr_ind_ext - tbl_fea_mode
4661: short faddr_ind_ext - tbl_fea_mode
4662:
4663: short fabs_short - tbl_fea_mode
4664: short fabs_long - tbl_fea_mode
4665: short fpc_ind - tbl_fea_mode
4666: short fpc_ind_ext - tbl_fea_mode
4667: short tbl_fea_mode - tbl_fea_mode
4668: short tbl_fea_mode - tbl_fea_mode
4669: short tbl_fea_mode - tbl_fea_mode
4670: short tbl_fea_mode - tbl_fea_mode
4671:
4672: ###################################
4673: # Address register indirect: (An) #
4674: ###################################
4675: faddr_ind_a0:
4676: mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4677: rts
4678:
4679: faddr_ind_a1:
4680: mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4681: rts
4682:
4683: faddr_ind_a2:
4684: mov.l %a2,%a0 # Get current a2
4685: rts
4686:
4687: faddr_ind_a3:
4688: mov.l %a3,%a0 # Get current a3
4689: rts
4690:
4691: faddr_ind_a4:
4692: mov.l %a4,%a0 # Get current a4
4693: rts
4694:
4695: faddr_ind_a5:
4696: mov.l %a5,%a0 # Get current a5
4697: rts
4698:
4699: faddr_ind_a6:
4700: mov.l (%a6),%a0 # Get current a6
4701: rts
4702:
4703: faddr_ind_a7:
4704: mov.l EXC_A7(%a6),%a0 # Get current a7
4705: rts
4706:
4707: #####################################################
4708: # Address register indirect w/ postincrement: (An)+ #
4709: #####################################################
4710: faddr_ind_p_a0:
4711: mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4712: mov.l %d0,%d1
4713: add.l %a0,%d1 # Increment
4714: mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4715: mov.l %d0,%a0
4716: rts
4717:
4718: faddr_ind_p_a1:
4719: mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4720: mov.l %d0,%d1
4721: add.l %a0,%d1 # Increment
4722: mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4723: mov.l %d0,%a0
4724: rts
4725:
4726: faddr_ind_p_a2:
4727: mov.l %a2,%d0 # Get current a2
4728: mov.l %d0,%d1
4729: add.l %a0,%d1 # Increment
4730: mov.l %d1,%a2 # Save incr value
4731: mov.l %d0,%a0
4732: rts
4733:
4734: faddr_ind_p_a3:
4735: mov.l %a3,%d0 # Get current a3
4736: mov.l %d0,%d1
4737: add.l %a0,%d1 # Increment
4738: mov.l %d1,%a3 # Save incr value
4739: mov.l %d0,%a0
4740: rts
4741:
4742: faddr_ind_p_a4:
4743: mov.l %a4,%d0 # Get current a4
4744: mov.l %d0,%d1
4745: add.l %a0,%d1 # Increment
4746: mov.l %d1,%a4 # Save incr value
4747: mov.l %d0,%a0
4748: rts
4749:
4750: faddr_ind_p_a5:
4751: mov.l %a5,%d0 # Get current a5
4752: mov.l %d0,%d1
4753: add.l %a0,%d1 # Increment
4754: mov.l %d1,%a5 # Save incr value
4755: mov.l %d0,%a0
4756: rts
4757:
4758: faddr_ind_p_a6:
4759: mov.l (%a6),%d0 # Get current a6
4760: mov.l %d0,%d1
4761: add.l %a0,%d1 # Increment
4762: mov.l %d1,(%a6) # Save incr value
4763: mov.l %d0,%a0
4764: rts
4765:
4766: faddr_ind_p_a7:
4767: mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4768:
4769: mov.l EXC_A7(%a6),%d0 # Get current a7
4770: mov.l %d0,%d1
4771: add.l %a0,%d1 # Increment
4772: mov.l %d1,EXC_A7(%a6) # Save incr value
4773: mov.l %d0,%a0
4774: rts
4775:
4776: ####################################################
4777: # Address register indirect w/ predecrement: -(An) #
4778: ####################################################
4779: faddr_ind_m_a0:
4780: mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4781: sub.l %a0,%d0 # Decrement
4782: mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4783: mov.l %d0,%a0
4784: rts
4785:
4786: faddr_ind_m_a1:
4787: mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4788: sub.l %a0,%d0 # Decrement
4789: mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4790: mov.l %d0,%a0
4791: rts
4792:
4793: faddr_ind_m_a2:
4794: mov.l %a2,%d0 # Get current a2
4795: sub.l %a0,%d0 # Decrement
4796: mov.l %d0,%a2 # Save decr value
4797: mov.l %d0,%a0
4798: rts
4799:
4800: faddr_ind_m_a3:
4801: mov.l %a3,%d0 # Get current a3
4802: sub.l %a0,%d0 # Decrement
4803: mov.l %d0,%a3 # Save decr value
4804: mov.l %d0,%a0
4805: rts
4806:
4807: faddr_ind_m_a4:
4808: mov.l %a4,%d0 # Get current a4
4809: sub.l %a0,%d0 # Decrement
4810: mov.l %d0,%a4 # Save decr value
4811: mov.l %d0,%a0
4812: rts
4813:
4814: faddr_ind_m_a5:
4815: mov.l %a5,%d0 # Get current a5
4816: sub.l %a0,%d0 # Decrement
4817: mov.l %d0,%a5 # Save decr value
4818: mov.l %d0,%a0
4819: rts
4820:
4821: faddr_ind_m_a6:
4822: mov.l (%a6),%d0 # Get current a6
4823: sub.l %a0,%d0 # Decrement
4824: mov.l %d0,(%a6) # Save decr value
4825: mov.l %d0,%a0
4826: rts
4827:
4828: faddr_ind_m_a7:
4829: mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4830:
4831: mov.l EXC_A7(%a6),%d0 # Get current a7
4832: sub.l %a0,%d0 # Decrement
4833: mov.l %d0,EXC_A7(%a6) # Save decr value
4834: mov.l %d0,%a0
4835: rts
4836:
4837: ########################################################
4838: # Address register indirect w/ displacement: (d16, An) #
4839: ########################################################
4840: faddr_ind_disp_a0:
4841: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4842: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4843: bsr.l _imem_read_word
4844:
4845: tst.l %d1 # did ifetch fail?
4846: bne.l iea_iacc # yes
4847:
4848: mov.w %d0,%a0 # sign extend displacement
4849:
4850: add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4851: rts
4852:
4853: faddr_ind_disp_a1:
4854: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4855: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4856: bsr.l _imem_read_word
4857:
4858: tst.l %d1 # did ifetch fail?
4859: bne.l iea_iacc # yes
4860:
4861: mov.w %d0,%a0 # sign extend displacement
4862:
4863: add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4864: rts
4865:
4866: faddr_ind_disp_a2:
4867: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4868: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4869: bsr.l _imem_read_word
4870:
4871: tst.l %d1 # did ifetch fail?
4872: bne.l iea_iacc # yes
4873:
4874: mov.w %d0,%a0 # sign extend displacement
4875:
4876: add.l %a2,%a0 # a2 + d16
4877: rts
4878:
4879: faddr_ind_disp_a3:
4880: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4881: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4882: bsr.l _imem_read_word
4883:
4884: tst.l %d1 # did ifetch fail?
4885: bne.l iea_iacc # yes
4886:
4887: mov.w %d0,%a0 # sign extend displacement
4888:
4889: add.l %a3,%a0 # a3 + d16
4890: rts
4891:
4892: faddr_ind_disp_a4:
4893: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4894: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4895: bsr.l _imem_read_word
4896:
4897: tst.l %d1 # did ifetch fail?
4898: bne.l iea_iacc # yes
4899:
4900: mov.w %d0,%a0 # sign extend displacement
4901:
4902: add.l %a4,%a0 # a4 + d16
4903: rts
4904:
4905: faddr_ind_disp_a5:
4906: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4907: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4908: bsr.l _imem_read_word
4909:
4910: tst.l %d1 # did ifetch fail?
4911: bne.l iea_iacc # yes
4912:
4913: mov.w %d0,%a0 # sign extend displacement
4914:
4915: add.l %a5,%a0 # a5 + d16
4916: rts
4917:
4918: faddr_ind_disp_a6:
4919: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4920: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4921: bsr.l _imem_read_word
4922:
4923: tst.l %d1 # did ifetch fail?
4924: bne.l iea_iacc # yes
4925:
4926: mov.w %d0,%a0 # sign extend displacement
4927:
4928: add.l (%a6),%a0 # a6 + d16
4929: rts
4930:
4931: faddr_ind_disp_a7:
4932: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4933: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4934: bsr.l _imem_read_word
4935:
4936: tst.l %d1 # did ifetch fail?
4937: bne.l iea_iacc # yes
4938:
4939: mov.w %d0,%a0 # sign extend displacement
4940:
4941: add.l EXC_A7(%a6),%a0 # a7 + d16
4942: rts
4943:
4944: ########################################################################
4945: # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4946: # " " " w/ " (base displacement): (bd, An, Xn) #
4947: # Memory indirect postindexed: ([bd, An], Xn, od) #
4948: # Memory indirect preindexed: ([bd, An, Xn], od) #
4949: ########################################################################
4950: faddr_ind_ext:
4951: addq.l &0x8,%d1
4952: bsr.l fetch_dreg # fetch base areg
4953: mov.l %d0,-(%sp)
4954:
4955: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4956: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4957: bsr.l _imem_read_word # fetch extword in d0
4958:
4959: tst.l %d1 # did ifetch fail?
4960: bne.l iea_iacc # yes
4961:
4962: mov.l (%sp)+,%a0
4963:
4964: btst &0x8,%d0
4965: bne.w fcalc_mem_ind
4966:
4967: mov.l %d0,L_SCR1(%a6) # hold opword
4968:
4969: mov.l %d0,%d1
4970: rol.w &0x4,%d1
4971: andi.w &0xf,%d1 # extract index regno
4972:
4973: # count on fetch_dreg() not to alter a0...
4974: bsr.l fetch_dreg # fetch index
4975:
4976: mov.l %d2,-(%sp) # save d2
4977: mov.l L_SCR1(%a6),%d2 # fetch opword
4978:
4979: btst &0xb,%d2 # is it word or long?
4980: bne.b faii8_long
4981: ext.l %d0 # sign extend word index
4982: faii8_long:
4983: mov.l %d2,%d1
4984: rol.w &0x7,%d1
4985: andi.l &0x3,%d1 # extract scale value
4986:
4987: lsl.l %d1,%d0 # shift index by scale
4988:
4989: extb.l %d2 # sign extend displacement
4990: add.l %d2,%d0 # index + disp
4991: add.l %d0,%a0 # An + (index + disp)
4992:
4993: mov.l (%sp)+,%d2 # restore old d2
4994: rts
4995:
4996: ###########################
4997: # Absolute short: (XXX).W #
4998: ###########################
4999: fabs_short:
5000: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5001: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5002: bsr.l _imem_read_word # fetch short address
5003:
5004: tst.l %d1 # did ifetch fail?
5005: bne.l iea_iacc # yes
5006:
5007: mov.w %d0,%a0 # return <ea> in a0
5008: rts
5009:
5010: ##########################
5011: # Absolute long: (XXX).L #
5012: ##########################
5013: fabs_long:
5014: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5015: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5016: bsr.l _imem_read_long # fetch long address
5017:
5018: tst.l %d1 # did ifetch fail?
5019: bne.l iea_iacc # yes
5020:
5021: mov.l %d0,%a0 # return <ea> in a0
5022: rts
5023:
5024: #######################################################
5025: # Program counter indirect w/ displacement: (d16, PC) #
5026: #######################################################
5027: fpc_ind:
5028: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5029: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5030: bsr.l _imem_read_word # fetch word displacement
5031:
5032: tst.l %d1 # did ifetch fail?
5033: bne.l iea_iacc # yes
5034:
5035: mov.w %d0,%a0 # sign extend displacement
5036:
5037: add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5038:
5039: # _imem_read_word() increased the extwptr by 2. need to adjust here.
5040: subq.l &0x2,%a0 # adjust <ea>
5041: rts
5042:
5043: ##########################################################
5044: # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5045: # " " w/ " (base displacement): (bd, PC, An) #
5046: # PC memory indirect postindexed: ([bd, PC], Xn, od) #
5047: # PC memory indirect preindexed: ([bd, PC, Xn], od) #
5048: ##########################################################
5049: fpc_ind_ext:
5050: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5051: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5052: bsr.l _imem_read_word # fetch ext word
5053:
5054: tst.l %d1 # did ifetch fail?
5055: bne.l iea_iacc # yes
5056:
5057: mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5058: subq.l &0x2,%a0 # adjust base
5059:
5060: btst &0x8,%d0 # is disp only 8 bits?
5061: bne.w fcalc_mem_ind # calc memory indirect
5062:
5063: mov.l %d0,L_SCR1(%a6) # store opword
5064:
5065: mov.l %d0,%d1 # make extword copy
5066: rol.w &0x4,%d1 # rotate reg num into place
5067: andi.w &0xf,%d1 # extract register number
5068:
5069: # count on fetch_dreg() not to alter a0...
5070: bsr.l fetch_dreg # fetch index
5071:
5072: mov.l %d2,-(%sp) # save d2
5073: mov.l L_SCR1(%a6),%d2 # fetch opword
5074:
5075: btst &0xb,%d2 # is index word or long?
5076: bne.b fpii8_long # long
5077: ext.l %d0 # sign extend word index
5078: fpii8_long:
5079: mov.l %d2,%d1
5080: rol.w &0x7,%d1 # rotate scale value into place
5081: andi.l &0x3,%d1 # extract scale value
5082:
5083: lsl.l %d1,%d0 # shift index by scale
5084:
5085: extb.l %d2 # sign extend displacement
5086: add.l %d2,%d0 # disp + index
5087: add.l %d0,%a0 # An + (index + disp)
5088:
5089: mov.l (%sp)+,%d2 # restore temp register
5090: rts
5091:
5092: # d2 = index
5093: # d3 = base
5094: # d4 = od
5095: # d5 = extword
5096: fcalc_mem_ind:
5097: btst &0x6,%d0 # is the index suppressed?
5098: beq.b fcalc_index
5099:
5100: movm.l &0x3c00,-(%sp) # save d2-d5
5101:
5102: mov.l %d0,%d5 # put extword in d5
5103: mov.l %a0,%d3 # put base in d3
5104:
5105: clr.l %d2 # yes, so index = 0
5106: bra.b fbase_supp_ck
5107:
5108: # index:
5109: fcalc_index:
5110: mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5111: bfextu %d0{&16:&4},%d1 # fetch dreg index
5112: bsr.l fetch_dreg
5113:
5114: movm.l &0x3c00,-(%sp) # save d2-d5
5115: mov.l %d0,%d2 # put index in d2
5116: mov.l L_SCR1(%a6),%d5
5117: mov.l %a0,%d3
5118:
5119: btst &0xb,%d5 # is index word or long?
5120: bne.b fno_ext
5121: ext.l %d2
5122:
5123: fno_ext:
5124: bfextu %d5{&21:&2},%d0
5125: lsl.l %d0,%d2
5126:
5127: # base address (passed as parameter in d3):
5128: # we clear the value here if it should actually be suppressed.
5129: fbase_supp_ck:
5130: btst &0x7,%d5 # is the bd suppressed?
5131: beq.b fno_base_sup
5132: clr.l %d3
5133:
5134: # base displacement:
5135: fno_base_sup:
5136: bfextu %d5{&26:&2},%d0 # get bd size
5137: # beq.l fmovm_error # if (size == 0) it's reserved
5138:
5139: cmpi.b %d0,&0x2
5140: blt.b fno_bd
5141: beq.b fget_word_bd
5142:
5143: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5144: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5145: bsr.l _imem_read_long
5146:
5147: tst.l %d1 # did ifetch fail?
5148: bne.l fcea_iacc # yes
5149:
5150: bra.b fchk_ind
5151:
5152: fget_word_bd:
5153: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5154: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5155: bsr.l _imem_read_word
5156:
5157: tst.l %d1 # did ifetch fail?
5158: bne.l fcea_iacc # yes
5159:
5160: ext.l %d0 # sign extend bd
5161:
5162: fchk_ind:
5163: add.l %d0,%d3 # base += bd
5164:
5165: # outer displacement:
5166: fno_bd:
5167: bfextu %d5{&30:&2},%d0 # is od suppressed?
5168: beq.w faii_bd
5169:
5170: cmpi.b %d0,&0x2
5171: blt.b fnull_od
5172: beq.b fword_od
5173:
5174: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5175: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5176: bsr.l _imem_read_long
5177:
5178: tst.l %d1 # did ifetch fail?
5179: bne.l fcea_iacc # yes
5180:
5181: bra.b fadd_them
5182:
5183: fword_od:
5184: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5185: addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5186: bsr.l _imem_read_word
5187:
5188: tst.l %d1 # did ifetch fail?
5189: bne.l fcea_iacc # yes
5190:
5191: ext.l %d0 # sign extend od
5192: bra.b fadd_them
5193:
5194: fnull_od:
5195: clr.l %d0
5196:
5197: fadd_them:
5198: mov.l %d0,%d4
5199:
5200: btst &0x2,%d5 # pre or post indexing?
5201: beq.b fpre_indexed
5202:
5203: mov.l %d3,%a0
5204: bsr.l _dmem_read_long
5205:
5206: tst.l %d1 # did dfetch fail?
5207: bne.w fcea_err # yes
5208:
5209: add.l %d2,%d0 # <ea> += index
5210: add.l %d4,%d0 # <ea> += od
5211: bra.b fdone_ea
5212:
5213: fpre_indexed:
5214: add.l %d2,%d3 # preindexing
5215: mov.l %d3,%a0
5216: bsr.l _dmem_read_long
5217:
5218: tst.l %d1 # did dfetch fail?
5219: bne.w fcea_err # yes
5220:
5221: add.l %d4,%d0 # ea += od
5222: bra.b fdone_ea
5223:
5224: faii_bd:
5225: add.l %d2,%d3 # ea = (base + bd) + index
5226: mov.l %d3,%d0
5227: fdone_ea:
5228: mov.l %d0,%a0
5229:
5230: movm.l (%sp)+,&0x003c # restore d2-d5
5231: rts
5232:
5233: #########################################################
5234: fcea_err:
5235: mov.l %d3,%a0
5236:
5237: movm.l (%sp)+,&0x003c # restore d2-d5
5238: mov.w &0x0101,%d0
5239: bra.l iea_dacc
5240:
5241: fcea_iacc:
5242: movm.l (%sp)+,&0x003c # restore d2-d5
5243: bra.l iea_iacc
5244:
5245: fmovm_out_err:
5246: bsr.l restore
5247: mov.w &0x00e1,%d0
5248: bra.b fmovm_err
5249:
5250: fmovm_in_err:
5251: bsr.l restore
5252: mov.w &0x0161,%d0
5253:
5254: fmovm_err:
5255: mov.l L_SCR1(%a6),%a0
5256: bra.l iea_dacc
5257:
5258: #########################################################################
5259: # XDEF **************************************************************** #
5260: # fmovm_ctrl(): emulate fmovm.l of control registers instr #
5261: # #
5262: # XREF **************************************************************** #
5263: # _imem_read_long() - read longword from memory #
5264: # iea_iacc() - _imem_read_long() failed; error recovery #
5265: # #
5266: # INPUT *************************************************************** #
5267: # None #
5268: # #
5269: # OUTPUT ************************************************************** #
5270: # If _imem_read_long() doesn't fail: #
5271: # USER_FPCR(a6) = new FPCR value #
5272: # USER_FPSR(a6) = new FPSR value #
5273: # USER_FPIAR(a6) = new FPIAR value #
5274: # #
5275: # ALGORITHM *********************************************************** #
5276: # Decode the instruction type by looking at the extension word #
5277: # in order to see how many control registers to fetch from memory. #
5278: # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5279: # the special access error exit handler iea_iacc(). #
5280: # #
5281: # Instruction word decoding: #
5282: # #
5283: # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5284: # #
5285: # WORD1 WORD2 #
5286: # 1111 0010 00 111100 100$ $$00 0000 0000 #
5287: # #
5288: # $$$ (100): FPCR #
5289: # (010): FPSR #
5290: # (001): FPIAR #
5291: # (000): FPIAR #
5292: # #
5293: #########################################################################
5294:
5295: global fmovm_ctrl
5296: fmovm_ctrl:
5297: mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5298: cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5299: beq.w fctrl_in_7 # yes
5300: cmpi.b %d0,&0x98 # fpcr & fpsr ?
5301: beq.w fctrl_in_6 # yes
5302: cmpi.b %d0,&0x94 # fpcr & fpiar ?
5303: beq.b fctrl_in_5 # yes
5304:
5305: # fmovem.l #<data>, fpsr/fpiar
5306: fctrl_in_3:
5307: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5308: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5309: bsr.l _imem_read_long # fetch FPSR from mem
5310:
5311: tst.l %d1 # did ifetch fail?
5312: bne.l iea_iacc # yes
5313:
5314: mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5315: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5316: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5317: bsr.l _imem_read_long # fetch FPIAR from mem
5318:
5319: tst.l %d1 # did ifetch fail?
5320: bne.l iea_iacc # yes
5321:
5322: mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5323: rts
5324:
5325: # fmovem.l #<data>, fpcr/fpiar
5326: fctrl_in_5:
5327: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5328: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5329: bsr.l _imem_read_long # fetch FPCR from mem
5330:
5331: tst.l %d1 # did ifetch fail?
5332: bne.l iea_iacc # yes
5333:
5334: mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5335: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5336: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5337: bsr.l _imem_read_long # fetch FPIAR from mem
5338:
5339: tst.l %d1 # did ifetch fail?
5340: bne.l iea_iacc # yes
5341:
5342: mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5343: rts
5344:
5345: # fmovem.l #<data>, fpcr/fpsr
5346: fctrl_in_6:
5347: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5348: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5349: bsr.l _imem_read_long # fetch FPCR from mem
5350:
5351: tst.l %d1 # did ifetch fail?
5352: bne.l iea_iacc # yes
5353:
5354: mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5355: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5356: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5357: bsr.l _imem_read_long # fetch FPSR from mem
5358:
5359: tst.l %d1 # did ifetch fail?
5360: bne.l iea_iacc # yes
5361:
5362: mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5363: rts
5364:
5365: # fmovem.l #<data>, fpcr/fpsr/fpiar
5366: fctrl_in_7:
5367: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5368: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5369: bsr.l _imem_read_long # fetch FPCR from mem
5370:
5371: tst.l %d1 # did ifetch fail?
5372: bne.l iea_iacc # yes
5373:
5374: mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5375: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5376: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5377: bsr.l _imem_read_long # fetch FPSR from mem
5378:
5379: tst.l %d1 # did ifetch fail?
5380: bne.l iea_iacc # yes
5381:
5382: mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5383: mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5384: addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5385: bsr.l _imem_read_long # fetch FPIAR from mem
5386:
5387: tst.l %d1 # did ifetch fail?
5388: bne.l iea_iacc # yes
5389:
5390: mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5391: rts
5392:
5393: ##########################################################################
5394:
5395: #########################################################################
5396: # XDEF **************************************************************** #
5397: # addsub_scaler2(): scale inputs to fadd/fsub such that no #
5398: # OVFL/UNFL exceptions will result #
5399: # #
5400: # XREF **************************************************************** #
5401: # norm() - normalize mantissa after adjusting exponent #
5402: # #
5403: # INPUT *************************************************************** #
5404: # FP_SRC(a6) = fp op1(src) #
5405: # FP_DST(a6) = fp op2(dst) #
5406: # #
5407: # OUTPUT ************************************************************** #
5408: # FP_SRC(a6) = fp op1 scaled(src) #
5409: # FP_DST(a6) = fp op2 scaled(dst) #
5410: # d0 = scale amount #
5411: # #
5412: # ALGORITHM *********************************************************** #
5413: # If the DST exponent is > the SRC exponent, set the DST exponent #
5414: # equal to 0x3fff and scale the SRC exponent by the value that the #
5415: # DST exponent was scaled by. If the SRC exponent is greater or equal, #
5416: # do the opposite. Return this scale factor in d0. #
5417: # If the two exponents differ by > the number of mantissa bits #
5418: # plus two, then set the smallest exponent to a very small value as a #
5419: # quick shortcut. #
5420: # #
5421: #########################################################################
5422:
5423: global addsub_scaler2
5424: addsub_scaler2:
5425: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5426: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5427: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5428: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5429: mov.w SRC_EX(%a0),%d0
5430: mov.w DST_EX(%a1),%d1
5431: mov.w %d0,FP_SCR0_EX(%a6)
5432: mov.w %d1,FP_SCR1_EX(%a6)
5433:
5434: andi.w &0x7fff,%d0
5435: andi.w &0x7fff,%d1
5436: mov.w %d0,L_SCR1(%a6) # store src exponent
5437: mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5438:
5439: cmp.w %d0, %d1 # is src exp >= dst exp?
5440: bge.l src_exp_ge2
5441:
5442: # dst exp is > src exp; scale dst to exp = 0x3fff
5443: dst_exp_gt2:
5444: bsr.l scale_to_zero_dst
5445: mov.l %d0,-(%sp) # save scale factor
5446:
5447: cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5448: bne.b cmpexp12
5449:
5450: lea FP_SCR0(%a6),%a0
5451: bsr.l norm # normalize the denorm; result is new exp
5452: neg.w %d0 # new exp = -(shft val)
5453: mov.w %d0,L_SCR1(%a6) # inset new exp
5454:
5455: cmpexp12:
5456: mov.w 2+L_SCR1(%a6),%d0
5457: subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5458:
5459: cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5460: bge.b quick_scale12
5461:
5462: mov.w L_SCR1(%a6),%d0
5463: add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5464: mov.w FP_SCR0_EX(%a6),%d1
5465: and.w &0x8000,%d1
5466: or.w %d1,%d0 # concat {sgn,new exp}
5467: mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5468:
5469: mov.l (%sp)+,%d0 # return SCALE factor
5470: rts
5471:
5472: quick_scale12:
5473: andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5474: bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5475:
5476: mov.l (%sp)+,%d0 # return SCALE factor
5477: rts
5478:
5479: # src exp is >= dst exp; scale src to exp = 0x3fff
5480: src_exp_ge2:
5481: bsr.l scale_to_zero_src
5482: mov.l %d0,-(%sp) # save scale factor
5483:
5484: cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5485: bne.b cmpexp22
5486: lea FP_SCR1(%a6),%a0
5487: bsr.l norm # normalize the denorm; result is new exp
5488: neg.w %d0 # new exp = -(shft val)
5489: mov.w %d0,2+L_SCR1(%a6) # inset new exp
5490:
5491: cmpexp22:
5492: mov.w L_SCR1(%a6),%d0
5493: subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5494:
5495: cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5496: bge.b quick_scale22
5497:
5498: mov.w 2+L_SCR1(%a6),%d0
5499: add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5500: mov.w FP_SCR1_EX(%a6),%d1
5501: andi.w &0x8000,%d1
5502: or.w %d1,%d0 # concat {sgn,new exp}
5503: mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5504:
5505: mov.l (%sp)+,%d0 # return SCALE factor
5506: rts
5507:
5508: quick_scale22:
5509: andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5510: bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5511:
5512: mov.l (%sp)+,%d0 # return SCALE factor
5513: rts
5514:
5515: ##########################################################################
5516:
5517: #########################################################################
5518: # XDEF **************************************************************** #
5519: # scale_to_zero_src(): scale the exponent of extended precision #
5520: # value at FP_SCR0(a6). #
5521: # #
5522: # XREF **************************************************************** #
5523: # norm() - normalize the mantissa if the operand was a DENORM #
5524: # #
5525: # INPUT *************************************************************** #
5526: # FP_SCR0(a6) = extended precision operand to be scaled #
5527: # #
5528: # OUTPUT ************************************************************** #
5529: # FP_SCR0(a6) = scaled extended precision operand #
5530: # d0 = scale value #
5531: # #
5532: # ALGORITHM *********************************************************** #
5533: # Set the exponent of the input operand to 0x3fff. Save the value #
5534: # of the difference between the original and new exponent. Then, #
5535: # normalize the operand if it was a DENORM. Add this normalization #
5536: # value to the previous value. Return the result. #
5537: # #
5538: #########################################################################
5539:
5540: global scale_to_zero_src
5541: scale_to_zero_src:
5542: mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5543: mov.w %d1,%d0 # make a copy
5544:
5545: andi.l &0x7fff,%d1 # extract operand's exponent
5546:
5547: andi.w &0x8000,%d0 # extract operand's sgn
5548: or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5549:
5550: mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5551:
5552: cmpi.b STAG(%a6),&DENORM # is operand normalized?
5553: beq.b stzs_denorm # normalize the DENORM
5554:
5555: stzs_norm:
5556: mov.l &0x3fff,%d0
5557: sub.l %d1,%d0 # scale = BIAS + (-exp)
5558:
5559: rts
5560:
5561: stzs_denorm:
5562: lea FP_SCR0(%a6),%a0 # pass ptr to src op
5563: bsr.l norm # normalize denorm
5564: neg.l %d0 # new exponent = -(shft val)
5565: mov.l %d0,%d1 # prepare for op_norm call
5566: bra.b stzs_norm # finish scaling
5567:
5568: ###
5569:
5570: #########################################################################
5571: # XDEF **************************************************************** #
5572: # scale_sqrt(): scale the input operand exponent so a subsequent #
5573: # fsqrt operation won't take an exception. #
5574: # #
5575: # XREF **************************************************************** #
5576: # norm() - normalize the mantissa if the operand was a DENORM #
5577: # #
5578: # INPUT *************************************************************** #
5579: # FP_SCR0(a6) = extended precision operand to be scaled #
5580: # #
5581: # OUTPUT ************************************************************** #
5582: # FP_SCR0(a6) = scaled extended precision operand #
5583: # d0 = scale value #
5584: # #
5585: # ALGORITHM *********************************************************** #
5586: # If the input operand is a DENORM, normalize it. #
5587: # If the exponent of the input operand is even, set the exponent #
5588: # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5589: # exponent of the input operand is off, set the exponent to ox3fff and #
5590: # return a scale factor of "(exp-0x3fff)/2". #
5591: # #
5592: #########################################################################
5593:
5594: global scale_sqrt
5595: scale_sqrt:
5596: cmpi.b STAG(%a6),&DENORM # is operand normalized?
5597: beq.b ss_denorm # normalize the DENORM
5598:
5599: mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5600: andi.l &0x7fff,%d1 # extract operand's exponent
5601:
5602: andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5603:
5604: btst &0x0,%d1 # is exp even or odd?
5605: beq.b ss_norm_even
5606:
5607: ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5608:
5609: mov.l &0x3fff,%d0
5610: sub.l %d1,%d0 # scale = BIAS + (-exp)
5611: asr.l &0x1,%d0 # divide scale factor by 2
5612: rts
5613:
5614: ss_norm_even:
5615: ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5616:
5617: mov.l &0x3ffe,%d0
5618: sub.l %d1,%d0 # scale = BIAS + (-exp)
5619: asr.l &0x1,%d0 # divide scale factor by 2
5620: rts
5621:
5622: ss_denorm:
5623: lea FP_SCR0(%a6),%a0 # pass ptr to src op
5624: bsr.l norm # normalize denorm
5625:
5626: btst &0x0,%d0 # is exp even or odd?
5627: beq.b ss_denorm_even
5628:
5629: ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5630:
5631: add.l &0x3fff,%d0
5632: asr.l &0x1,%d0 # divide scale factor by 2
5633: rts
5634:
5635: ss_denorm_even:
5636: ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5637:
5638: add.l &0x3ffe,%d0
5639: asr.l &0x1,%d0 # divide scale factor by 2
5640: rts
5641:
5642: ###
5643:
5644: #########################################################################
5645: # XDEF **************************************************************** #
5646: # scale_to_zero_dst(): scale the exponent of extended precision #
5647: # value at FP_SCR1(a6). #
5648: # #
5649: # XREF **************************************************************** #
5650: # norm() - normalize the mantissa if the operand was a DENORM #
5651: # #
5652: # INPUT *************************************************************** #
5653: # FP_SCR1(a6) = extended precision operand to be scaled #
5654: # #
5655: # OUTPUT ************************************************************** #
5656: # FP_SCR1(a6) = scaled extended precision operand #
5657: # d0 = scale value #
5658: # #
5659: # ALGORITHM *********************************************************** #
5660: # Set the exponent of the input operand to 0x3fff. Save the value #
5661: # of the difference between the original and new exponent. Then, #
5662: # normalize the operand if it was a DENORM. Add this normalization #
5663: # value to the previous value. Return the result. #
5664: # #
5665: #########################################################################
5666:
5667: global scale_to_zero_dst
5668: scale_to_zero_dst:
5669: mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5670: mov.w %d1,%d0 # make a copy
5671:
5672: andi.l &0x7fff,%d1 # extract operand's exponent
5673:
5674: andi.w &0x8000,%d0 # extract operand's sgn
5675: or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5676:
5677: mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5678:
5679: cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5680: beq.b stzd_denorm # normalize the DENORM
5681:
5682: stzd_norm:
5683: mov.l &0x3fff,%d0
5684: sub.l %d1,%d0 # scale = BIAS + (-exp)
5685: rts
5686:
5687: stzd_denorm:
5688: lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5689: bsr.l norm # normalize denorm
5690: neg.l %d0 # new exponent = -(shft val)
5691: mov.l %d0,%d1 # prepare for op_norm call
5692: bra.b stzd_norm # finish scaling
5693:
5694: ##########################################################################
5695:
5696: #########################################################################
5697: # XDEF **************************************************************** #
5698: # res_qnan(): return default result w/ QNAN operand for dyadic #
5699: # res_snan(): return default result w/ SNAN operand for dyadic #
5700: # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5701: # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5702: # #
5703: # XREF **************************************************************** #
5704: # None #
5705: # #
5706: # INPUT *************************************************************** #
5707: # FP_SRC(a6) = pointer to extended precision src operand #
5708: # FP_DST(a6) = pointer to extended precision dst operand #
5709: # #
5710: # OUTPUT ************************************************************** #
5711: # fp0 = default result #
5712: # #
5713: # ALGORITHM *********************************************************** #
5714: # If either operand (but not both operands) of an operation is a #
5715: # nonsignalling NAN, then that NAN is returned as the result. If both #
5716: # operands are nonsignalling NANs, then the destination operand #
5717: # nonsignalling NAN is returned as the result. #
5718: # If either operand to an operation is a signalling NAN (SNAN), #
5719: # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5720: # enable bit is set in the FPCR, then the trap is taken and the #
5721: # destination is not modified. If the SNAN trap enable bit is not set, #
5722: # then the SNAN is converted to a nonsignalling NAN (by setting the #
5723: # SNAN bit in the operand to one), and the operation continues as #
5724: # described in the preceding paragraph, for nonsignalling NANs. #
5725: # Make sure the appropriate FPSR bits are set before exiting. #
5726: # #
5727: #########################################################################
5728:
5729: global res_qnan
5730: global res_snan
5731: res_qnan:
5732: res_snan:
5733: cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5734: beq.b dst_snan2
5735: cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5736: beq.b dst_qnan2
5737: src_nan:
5738: cmp.b STAG(%a6), &QNAN
5739: beq.b src_qnan2
5740: global res_snan_1op
5741: res_snan_1op:
5742: src_snan2:
5743: bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5744: or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5745: lea FP_SRC(%a6), %a0
5746: bra.b nan_comp
5747: global res_qnan_1op
5748: res_qnan_1op:
5749: src_qnan2:
5750: or.l &nan_mask, USER_FPSR(%a6)
5751: lea FP_SRC(%a6), %a0
5752: bra.b nan_comp
5753: dst_snan2:
5754: or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5755: bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5756: lea FP_DST(%a6), %a0
5757: bra.b nan_comp
5758: dst_qnan2:
5759: lea FP_DST(%a6), %a0
5760: cmp.b STAG(%a6), &SNAN
5761: bne nan_done
5762: or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5763: nan_done:
5764: or.l &nan_mask, USER_FPSR(%a6)
5765: nan_comp:
5766: btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5767: beq.b nan_not_neg
5768: or.l &neg_mask, USER_FPSR(%a6)
5769: nan_not_neg:
5770: fmovm.x (%a0), &0x80
5771: rts
5772:
5773: #########################################################################
5774: # XDEF **************************************************************** #
5775: # res_operr(): return default result during operand error #
5776: # #
5777: # XREF **************************************************************** #
5778: # None #
5779: # #
5780: # INPUT *************************************************************** #
5781: # None #
5782: # #
5783: # OUTPUT ************************************************************** #
5784: # fp0 = default operand error result #
5785: # #
5786: # ALGORITHM *********************************************************** #
5787: # An nonsignalling NAN is returned as the default result when #
5788: # an operand error occurs for the following cases: #
5789: # #
5790: # Multiply: (Infinity x Zero) #
5791: # Divide : (Zero / Zero) || (Infinity / Infinity) #
5792: # #
5793: #########################################################################
5794:
5795: global res_operr
5796: res_operr:
5797: or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5798: fmovm.x nan_return(%pc), &0x80
5799: rts
5800:
5801: nan_return:
5802: long 0x7fff0000, 0xffffffff, 0xffffffff
5803:
5804: #########################################################################
5805: # XDEF **************************************************************** #
5806: # _denorm(): denormalize an intermediate result #
5807: # #
5808: # XREF **************************************************************** #
5809: # None #
5810: # #
5811: # INPUT *************************************************************** #
5812: # a0 = points to the operand to be denormalized #
5813: # (in the internal extended format) #
5814: # #
5815: # d0 = rounding precision #
5816: # #
5817: # OUTPUT ************************************************************** #
5818: # a0 = pointer to the denormalized result #
5819: # (in the internal extended format) #
5820: # #
5821: # d0 = guard,round,sticky #
5822: # #
5823: # ALGORITHM *********************************************************** #
5824: # According to the exponent underflow threshold for the given #
5825: # precision, shift the mantissa bits to the right in order raise the #
5826: # exponent of the operand to the threshold value. While shifting the #
5827: # mantissa bits right, maintain the value of the guard, round, and #
5828: # sticky bits. #
5829: # other notes: #
5830: # (1) _denorm() is called by the underflow routines #
5831: # (2) _denorm() does NOT affect the status register #
5832: # #
5833: #########################################################################
5834:
5835: #
5836: # table of exponent threshold values for each precision
5837: #
5838: tbl_thresh:
5839: short 0x0
5840: short sgl_thresh
5841: short dbl_thresh
5842:
5843: global _denorm
5844: _denorm:
5845: #
5846: # Load the exponent threshold for the precision selected and check
5847: # to see if (threshold - exponent) is > 65 in which case we can
5848: # simply calculate the sticky bit and zero the mantissa. otherwise
5849: # we have to call the denormalization routine.
5850: #
5851: lsr.b &0x2, %d0 # shift prec to lo bits
5852: mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5853: mov.w %d1, %d0 # copy d1 into d0
5854: sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5855: cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5856: bpl.b denorm_set_stky # yes; just calc sticky
5857:
5858: clr.l %d0 # clear g,r,s
5859: btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5860: beq.b denorm_call # no; don't change anything
5861: bset &29, %d0 # yes; set sticky bit
5862:
5863: denorm_call:
5864: bsr.l dnrm_lp # denormalize the number
5865: rts
5866:
5867: #
5868: # all bit would have been shifted off during the denorm so simply
5869: # calculate if the sticky should be set and clear the entire mantissa.
5870: #
5871: denorm_set_stky:
5872: mov.l &0x20000000, %d0 # set sticky bit in return value
5873: mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5874: clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5875: clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5876: rts
5877:
5878: # #
5879: # dnrm_lp(): normalize exponent/mantissa to specified threshold #
5880: # #
5881: # INPUT: #
5882: # %a0 : points to the operand to be denormalized #
5883: # %d0{31:29} : initial guard,round,sticky #
5884: # %d1{15:0} : denormalization threshold #
5885: # OUTPUT: #
5886: # %a0 : points to the denormalized operand #
5887: # %d0{31:29} : final guard,round,sticky #
5888: # #
5889:
5890: # *** Local Equates *** #
5891: set GRS, L_SCR2 # g,r,s temp storage
5892: set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5893:
5894: global dnrm_lp
5895: dnrm_lp:
5896:
5897: #
5898: # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5899: # in memory so as to make the bitfield extraction for denormalization easier.
5900: #
5901: mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5902: mov.l %d0, GRS(%a6) # place g,r,s after it
5903:
5904: #
5905: # check to see how much less than the underflow threshold the operand
5906: # exponent is.
5907: #
5908: mov.l %d1, %d0 # copy the denorm threshold
5909: sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5910: ble.b dnrm_no_lp # d1 <= 0
5911: cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5912: blt.b case_1 # yes
5913: cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5914: blt.b case_2 # yes
5915: bra.w case_3 # (d1 >= 64)
5916:
5917: #
5918: # No normalization necessary
5919: #
5920: dnrm_no_lp:
5921: mov.l GRS(%a6), %d0 # restore original g,r,s
5922: rts
5923:
5924: #
5925: # case (0<d1<32)
5926: #
5927: # %d0 = denorm threshold
5928: # %d1 = "n" = amt to shift
5929: #
5930: # ---------------------------------------------------------
5931: # | FTEMP_HI | FTEMP_LO |grs000.........000|
5932: # ---------------------------------------------------------
5933: # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5934: # \ \ \ \
5935: # \ \ \ \
5936: # \ \ \ \
5937: # \ \ \ \
5938: # \ \ \ \
5939: # \ \ \ \
5940: # \ \ \ \
5941: # \ \ \ \
5942: # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5943: # ---------------------------------------------------------
5944: # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5945: # ---------------------------------------------------------
5946: #
5947: case_1:
5948: mov.l %d2, -(%sp) # create temp storage
5949:
5950: mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5951: mov.l &32, %d0
5952: sub.w %d1, %d0 # %d0 = 32 - %d1
5953:
5954: cmpi.w %d1, &29 # is shft amt >= 29
5955: blt.b case1_extract # no; no fix needed
5956: mov.b GRS(%a6), %d2
5957: or.b %d2, 3+FTEMP_LO2(%a6)
5958:
5959: case1_extract:
5960: bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5961: bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5962: bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5963:
5964: mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5965: mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5966:
5967: bftst %d0{&2:&30} # were bits shifted off?
5968: beq.b case1_sticky_clear # no; go finish
5969: bset &rnd_stky_bit, %d0 # yes; set sticky bit
5970:
5971: case1_sticky_clear:
5972: and.l &0xe0000000, %d0 # clear all but G,R,S
5973: mov.l (%sp)+, %d2 # restore temp register
5974: rts
5975:
5976: #
5977: # case (32<=d1<64)
5978: #
5979: # %d0 = denorm threshold
5980: # %d1 = "n" = amt to shift
5981: #
5982: # ---------------------------------------------------------
5983: # | FTEMP_HI | FTEMP_LO |grs000.........000|
5984: # ---------------------------------------------------------
5985: # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5986: # \ \ \
5987: # \ \ \
5988: # \ \ -------------------
5989: # \ -------------------- \
5990: # ------------------- \ \
5991: # \ \ \
5992: # \ \ \
5993: # \ \ \
5994: # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5995: # ---------------------------------------------------------
5996: # |0...............0|0....0| NEW_LO |grs |
5997: # ---------------------------------------------------------
5998: #
5999: case_2:
6000: mov.l %d2, -(%sp) # create temp storage
6001:
6002: mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
6003: subi.w &0x20, %d1 # %d1 now between 0 and 32
6004: mov.l &0x20, %d0
6005: sub.w %d1, %d0 # %d0 = 32 - %d1
6006:
6007: # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
6008: # the number of bits to check for the sticky detect.
6009: # it only plays a role in shift amounts of 61-63.
6010: mov.b GRS(%a6), %d2
6011: or.b %d2, 3+FTEMP_LO2(%a6)
6012:
6013: bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6014: bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6015:
6016: bftst %d1{&2:&30} # were any bits shifted off?
6017: bne.b case2_set_sticky # yes; set sticky bit
6018: bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6019: bne.b case2_set_sticky # yes; set sticky bit
6020:
6021: mov.l %d1, %d0 # move new G,R,S to %d0
6022: bra.b case2_end
6023:
6024: case2_set_sticky:
6025: mov.l %d1, %d0 # move new G,R,S to %d0
6026: bset &rnd_stky_bit, %d0 # set sticky bit
6027:
6028: case2_end:
6029: clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6030: mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6031: and.l &0xe0000000, %d0 # clear all but G,R,S
6032:
6033: mov.l (%sp)+,%d2 # restore temp register
6034: rts
6035:
6036: #
6037: # case (d1>=64)
6038: #
6039: # %d0 = denorm threshold
6040: # %d1 = amt to shift
6041: #
6042: case_3:
6043: mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6044:
6045: cmpi.w %d1, &65 # is shift amt > 65?
6046: blt.b case3_64 # no; it's == 64
6047: beq.b case3_65 # no; it's == 65
6048:
6049: #
6050: # case (d1>65)
6051: #
6052: # Shift value is > 65 and out of range. All bits are shifted off.
6053: # Return a zero mantissa with the sticky bit set
6054: #
6055: clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6056: clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6057: mov.l &0x20000000, %d0 # set sticky bit
6058: rts
6059:
6060: #
6061: # case (d1 == 64)
6062: #
6063: # ---------------------------------------------------------
6064: # | FTEMP_HI | FTEMP_LO |grs000.........000|
6065: # ---------------------------------------------------------
6066: # <-------(32)------>
6067: # \ \
6068: # \ \
6069: # \ \
6070: # \ ------------------------------
6071: # ------------------------------- \
6072: # \ \
6073: # \ \
6074: # \ \
6075: # <-------(32)------>
6076: # ---------------------------------------------------------
6077: # |0...............0|0................0|grs |
6078: # ---------------------------------------------------------
6079: #
6080: case3_64:
6081: mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6082: mov.l %d0, %d1 # make a copy
6083: and.l &0xc0000000, %d0 # extract G,R
6084: and.l &0x3fffffff, %d1 # extract other bits
6085:
6086: bra.b case3_complete
6087:
6088: #
6089: # case (d1 == 65)
6090: #
6091: # ---------------------------------------------------------
6092: # | FTEMP_HI | FTEMP_LO |grs000.........000|
6093: # ---------------------------------------------------------
6094: # <-------(32)------>
6095: # \ \
6096: # \ \
6097: # \ \
6098: # \ ------------------------------
6099: # -------------------------------- \
6100: # \ \
6101: # \ \
6102: # \ \
6103: # <-------(31)----->
6104: # ---------------------------------------------------------
6105: # |0...............0|0................0|0rs |
6106: # ---------------------------------------------------------
6107: #
6108: case3_65:
6109: mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6110: and.l &0x80000000, %d0 # extract R bit
6111: lsr.l &0x1, %d0 # shift high bit into R bit
6112: and.l &0x7fffffff, %d1 # extract other bits
6113:
6114: case3_complete:
6115: # last operation done was an "and" of the bits shifted off so the condition
6116: # codes are already set so branch accordingly.
6117: bne.b case3_set_sticky # yes; go set new sticky
6118: tst.l FTEMP_LO(%a0) # were any bits shifted off?
6119: bne.b case3_set_sticky # yes; go set new sticky
6120: tst.b GRS(%a6) # were any bits shifted off?
6121: bne.b case3_set_sticky # yes; go set new sticky
6122:
6123: #
6124: # no bits were shifted off so don't set the sticky bit.
6125: # the guard and
6126: # the entire mantissa is zero.
6127: #
6128: clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6129: clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6130: rts
6131:
6132: #
6133: # some bits were shifted off so set the sticky bit.
6134: # the entire mantissa is zero.
6135: #
6136: case3_set_sticky:
6137: bset &rnd_stky_bit,%d0 # set new sticky bit
6138: clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6139: clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6140: rts
6141:
6142: #########################################################################
6143: # XDEF **************************************************************** #
6144: # _round(): round result according to precision/mode #
6145: # #
6146: # XREF **************************************************************** #
6147: # None #
6148: # #
6149: # INPUT *************************************************************** #
6150: # a0 = ptr to input operand in internal extended format #
6151: # d1(hi) = contains rounding precision: #
6152: # ext = $0000xxxx #
6153: # sgl = $0004xxxx #
6154: # dbl = $0008xxxx #
6155: # d1(lo) = contains rounding mode: #
6156: # RN = $xxxx0000 #
6157: # RZ = $xxxx0001 #
6158: # RM = $xxxx0002 #
6159: # RP = $xxxx0003 #
6160: # d0{31:29} = contains the g,r,s bits (extended) #
6161: # #
6162: # OUTPUT ************************************************************** #
6163: # a0 = pointer to rounded result #
6164: # #
6165: # ALGORITHM *********************************************************** #
6166: # On return the value pointed to by a0 is correctly rounded, #
6167: # a0 is preserved and the g-r-s bits in d0 are cleared. #
6168: # The result is not typed - the tag field is invalid. The #
6169: # result is still in the internal extended format. #
6170: # #
6171: # The INEX bit of USER_FPSR will be set if the rounded result was #
6172: # inexact (i.e. if any of the g-r-s bits were set). #
6173: # #
6174: #########################################################################
6175:
6176: global _round
6177: _round:
6178: #
6179: # ext_grs() looks at the rounding precision and sets the appropriate
6180: # G,R,S bits.
6181: # If (G,R,S == 0) then result is exact and round is done, else set
6182: # the inex flag in status reg and continue.
6183: #
6184: bsr.l ext_grs # extract G,R,S
6185:
6186: tst.l %d0 # are G,R,S zero?
6187: beq.w truncate # yes; round is complete
6188:
6189: or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6190:
6191: #
6192: # Use rounding mode as an index into a jump table for these modes.
6193: # All of the following assumes grs != 0.
6194: #
6195: mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6196: jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6197:
6198: tbl_mode:
6199: short rnd_near - tbl_mode
6200: short truncate - tbl_mode # RZ always truncates
6201: short rnd_mnus - tbl_mode
6202: short rnd_plus - tbl_mode
6203:
6204: #################################################################
6205: # ROUND PLUS INFINITY #
6206: # #
6207: # If sign of fp number = 0 (positive), then add 1 to l. #
6208: #################################################################
6209: rnd_plus:
6210: tst.b FTEMP_SGN(%a0) # check for sign
6211: bmi.w truncate # if positive then truncate
6212:
6213: mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6214: swap %d1 # set up d1 for round prec.
6215:
6216: cmpi.b %d1, &s_mode # is prec = sgl?
6217: beq.w add_sgl # yes
6218: bgt.w add_dbl # no; it's dbl
6219: bra.w add_ext # no; it's ext
6220:
6221: #################################################################
6222: # ROUND MINUS INFINITY #
6223: # #
6224: # If sign of fp number = 1 (negative), then add 1 to l. #
6225: #################################################################
6226: rnd_mnus:
6227: tst.b FTEMP_SGN(%a0) # check for sign
6228: bpl.w truncate # if negative then truncate
6229:
6230: mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6231: swap %d1 # set up d1 for round prec.
6232:
6233: cmpi.b %d1, &s_mode # is prec = sgl?
6234: beq.w add_sgl # yes
6235: bgt.w add_dbl # no; it's dbl
6236: bra.w add_ext # no; it's ext
6237:
6238: #################################################################
6239: # ROUND NEAREST #
6240: # #
6241: # If (g=1), then add 1 to l and if (r=s=0), then clear l #
6242: # Note that this will round to even in case of a tie. #
6243: #################################################################
6244: rnd_near:
6245: asl.l &0x1, %d0 # shift g-bit to c-bit
6246: bcc.w truncate # if (g=1) then
6247:
6248: swap %d1 # set up d1 for round prec.
6249:
6250: cmpi.b %d1, &s_mode # is prec = sgl?
6251: beq.w add_sgl # yes
6252: bgt.w add_dbl # no; it's dbl
6253: bra.w add_ext # no; it's ext
6254:
6255: # *** LOCAL EQUATES ***
6256: set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6257: set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6258:
6259: #########################
6260: # ADD SINGLE #
6261: #########################
6262: add_sgl:
6263: add.l &ad_1_sgl, FTEMP_HI(%a0)
6264: bcc.b scc_clr # no mantissa overflow
6265: roxr.w FTEMP_HI(%a0) # shift v-bit back in
6266: roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6267: add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6268: scc_clr:
6269: tst.l %d0 # test for rs = 0
6270: bne.b sgl_done
6271: and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6272: sgl_done:
6273: and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6274: clr.l FTEMP_LO(%a0) # clear d2
6275: rts
6276:
6277: #########################
6278: # ADD EXTENDED #
6279: #########################
6280: add_ext:
6281: addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6282: bcc.b xcc_clr # test for carry out
6283: addq.l &1,FTEMP_HI(%a0) # propagate carry
6284: bcc.b xcc_clr
6285: roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6286: roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6287: roxr.w FTEMP_LO(%a0)
6288: roxr.w FTEMP_LO+2(%a0)
6289: add.w &0x1,FTEMP_EX(%a0) # and inc exp
6290: xcc_clr:
6291: tst.l %d0 # test rs = 0
6292: bne.b add_ext_done
6293: and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6294: add_ext_done:
6295: rts
6296:
6297: #########################
6298: # ADD DOUBLE #
6299: #########################
6300: add_dbl:
6301: add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6302: bcc.b dcc_clr # no carry
6303: addq.l &0x1, FTEMP_HI(%a0) # propagate carry
6304: bcc.b dcc_clr # no carry
6305:
6306: roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6307: roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6308: roxr.w FTEMP_LO(%a0)
6309: roxr.w FTEMP_LO+2(%a0)
6310: addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6311: dcc_clr:
6312: tst.l %d0 # test for rs = 0
6313: bne.b dbl_done
6314: and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6315:
6316: dbl_done:
6317: and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6318: rts
6319:
6320: ###########################
6321: # Truncate all other bits #
6322: ###########################
6323: truncate:
6324: swap %d1 # select rnd prec
6325:
6326: cmpi.b %d1, &s_mode # is prec sgl?
6327: beq.w sgl_done # yes
6328: bgt.b dbl_done # no; it's dbl
6329: rts # no; it's ext
6330:
6331:
6332: #
6333: # ext_grs(): extract guard, round and sticky bits according to
6334: # rounding precision.
6335: #
6336: # INPUT
6337: # d0 = extended precision g,r,s (in d0{31:29})
6338: # d1 = {PREC,ROUND}
6339: # OUTPUT
6340: # d0{31:29} = guard, round, sticky
6341: #
6342: # The ext_grs extract the guard/round/sticky bits according to the
6343: # selected rounding precision. It is called by the round subroutine
6344: # only. All registers except d0 are kept intact. d0 becomes an
6345: # updated guard,round,sticky in d0{31:29}
6346: #
6347: # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6348: # prior to usage, and needs to restore d1 to original. this
6349: # routine is tightly tied to the round routine and not meant to
6350: # uphold standard subroutine calling practices.
6351: #
6352:
6353: ext_grs:
6354: swap %d1 # have d1.w point to round precision
6355: tst.b %d1 # is rnd prec = extended?
6356: bne.b ext_grs_not_ext # no; go handle sgl or dbl
6357:
6358: #
6359: # %d0 actually already hold g,r,s since _round() had it before calling
6360: # this function. so, as long as we don't disturb it, we are "returning" it.
6361: #
6362: ext_grs_ext:
6363: swap %d1 # yes; return to correct positions
6364: rts
6365:
6366: ext_grs_not_ext:
6367: movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6368:
6369: cmpi.b %d1, &s_mode # is rnd prec = sgl?
6370: bne.b ext_grs_dbl # no; go handle dbl
6371:
6372: #
6373: # sgl:
6374: # 96 64 40 32 0
6375: # -----------------------------------------------------
6376: # | EXP |XXXXXXX| |xx | |grs|
6377: # -----------------------------------------------------
6378: # <--(24)--->nn\ /
6379: # ee ---------------------
6380: # ww |
6381: # v
6382: # gr new sticky
6383: #
6384: ext_grs_sgl:
6385: bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6386: mov.l &30, %d2 # of the sgl prec. limits
6387: lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6388: mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6389: and.l &0x0000003f, %d2 # s bit is the or of all other
6390: bne.b ext_grs_st_stky # bits to the right of g-r
6391: tst.l FTEMP_LO(%a0) # test lower mantissa
6392: bne.b ext_grs_st_stky # if any are set, set sticky
6393: tst.l %d0 # test original g,r,s
6394: bne.b ext_grs_st_stky # if any are set, set sticky
6395: bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6396:
6397: #
6398: # dbl:
6399: # 96 64 32 11 0
6400: # -----------------------------------------------------
6401: # | EXP |XXXXXXX| | |xx |grs|
6402: # -----------------------------------------------------
6403: # nn\ /
6404: # ee -------
6405: # ww |
6406: # v
6407: # gr new sticky
6408: #
6409: ext_grs_dbl:
6410: bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6411: mov.l &30, %d2 # of the dbl prec. limits
6412: lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6413: mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6414: and.l &0x000001ff, %d2 # s bit is the or-ing of all
6415: bne.b ext_grs_st_stky # other bits to the right of g-r
6416: tst.l %d0 # test word original g,r,s
6417: bne.b ext_grs_st_stky # if any are set, set sticky
6418: bra.b ext_grs_end_sd # if clear, exit
6419:
6420: ext_grs_st_stky:
6421: bset &rnd_stky_bit, %d3 # set sticky bit
6422: ext_grs_end_sd:
6423: mov.l %d3, %d0 # return grs to d0
6424:
6425: movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6426:
6427: swap %d1 # restore d1 to original
6428: rts
6429:
6430: #########################################################################
6431: # norm(): normalize the mantissa of an extended precision input. the #
6432: # input operand should not be normalized already. #
6433: # #
6434: # XDEF **************************************************************** #
6435: # norm() #
6436: # #
6437: # XREF **************************************************************** #
6438: # none #
6439: # #
6440: # INPUT *************************************************************** #
6441: # a0 = pointer fp extended precision operand to normalize #
6442: # #
6443: # OUTPUT ************************************************************** #
6444: # d0 = number of bit positions the mantissa was shifted #
6445: # a0 = the input operand's mantissa is normalized; the exponent #
6446: # is unchanged. #
6447: # #
6448: #########################################################################
6449: global norm
6450: norm:
6451: mov.l %d2, -(%sp) # create some temp regs
6452: mov.l %d3, -(%sp)
6453:
6454: mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6455: mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6456:
6457: bfffo %d0{&0:&32}, %d2 # how many places to shift?
6458: beq.b norm_lo # hi(man) is all zeroes!
6459:
6460: norm_hi:
6461: lsl.l %d2, %d0 # left shift hi(man)
6462: bfextu %d1{&0:%d2}, %d3 # extract lo bits
6463:
6464: or.l %d3, %d0 # create hi(man)
6465: lsl.l %d2, %d1 # create lo(man)
6466:
6467: mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6468: mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6469:
6470: mov.l %d2, %d0 # return shift amount
6471:
6472: mov.l (%sp)+, %d3 # restore temp regs
6473: mov.l (%sp)+, %d2
6474:
6475: rts
6476:
6477: norm_lo:
6478: bfffo %d1{&0:&32}, %d2 # how many places to shift?
6479: lsl.l %d2, %d1 # shift lo(man)
6480: add.l &32, %d2 # add 32 to shft amount
6481:
6482: mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6483: clr.l FTEMP_LO(%a0) # lo(man) is now zero
6484:
6485: mov.l %d2, %d0 # return shift amount
6486:
6487: mov.l (%sp)+, %d3 # restore temp regs
6488: mov.l (%sp)+, %d2
6489:
6490: rts
6491:
6492: #########################################################################
6493: # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6494: # - returns corresponding optype tag #
6495: # #
6496: # XDEF **************************************************************** #
6497: # unnorm_fix() #
6498: # #
6499: # XREF **************************************************************** #
6500: # norm() - normalize the mantissa #
6501: # #
6502: # INPUT *************************************************************** #
6503: # a0 = pointer to unnormalized extended precision number #
6504: # #
6505: # OUTPUT ************************************************************** #
6506: # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6507: # a0 = input operand has been converted to a norm, denorm, or #
6508: # zero; both the exponent and mantissa are changed. #
6509: # #
6510: #########################################################################
6511:
6512: global unnorm_fix
6513: unnorm_fix:
6514: bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6515: bne.b unnorm_shift # hi(man) is not all zeroes
6516:
6517: #
6518: # hi(man) is all zeroes so see if any bits in lo(man) are set
6519: #
6520: unnorm_chk_lo:
6521: bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6522: beq.w unnorm_zero # yes
6523:
6524: add.w &32, %d0 # no; fix shift distance
6525:
6526: #
6527: # d0 = # shifts needed for complete normalization
6528: #
6529: unnorm_shift:
6530: clr.l %d1 # clear top word
6531: mov.w FTEMP_EX(%a0), %d1 # extract exponent
6532: and.w &0x7fff, %d1 # strip off sgn
6533:
6534: cmp.w %d0, %d1 # will denorm push exp < 0?
6535: bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6536:
6537: #
6538: # exponent would not go < 0. therefore, number stays normalized
6539: #
6540: sub.w %d0, %d1 # shift exponent value
6541: mov.w FTEMP_EX(%a0), %d0 # load old exponent
6542: and.w &0x8000, %d0 # save old sign
6543: or.w %d0, %d1 # {sgn,new exp}
6544: mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6545:
6546: bsr.l norm # normalize UNNORM
6547:
6548: mov.b &NORM, %d0 # return new optype tag
6549: rts
6550:
6551: #
6552: # exponent would go < 0, so only denormalize until exp = 0
6553: #
6554: unnorm_nrm_zero:
6555: cmp.b %d1, &32 # is exp <= 32?
6556: bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6557:
6558: bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6559: mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6560:
6561: mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6562: lsl.l %d1, %d0 # extract new lo(man)
6563: mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6564:
6565: and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6566:
6567: mov.b &DENORM, %d0 # return new optype tag
6568: rts
6569:
6570: #
6571: # only mantissa bits set are in lo(man)
6572: #
6573: unnorm_nrm_zero_lrg:
6574: sub.w &32, %d1 # adjust shft amt by 32
6575:
6576: mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6577: lsl.l %d1, %d0 # left shift lo(man)
6578:
6579: mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6580: clr.l FTEMP_LO(%a0) # lo(man) = 0
6581:
6582: and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6583:
6584: mov.b &DENORM, %d0 # return new optype tag
6585: rts
6586:
6587: #
6588: # whole mantissa is zero so this UNNORM is actually a zero
6589: #
6590: unnorm_zero:
6591: and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6592:
6593: mov.b &ZERO, %d0 # fix optype tag
6594: rts
6595:
6596: #########################################################################
6597: # XDEF **************************************************************** #
6598: # set_tag_x(): return the optype of the input ext fp number #
6599: # #
6600: # XREF **************************************************************** #
6601: # None #
6602: # #
6603: # INPUT *************************************************************** #
6604: # a0 = pointer to extended precision operand #
6605: # #
6606: # OUTPUT ************************************************************** #
6607: # d0 = value of type tag #
6608: # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6609: # #
6610: # ALGORITHM *********************************************************** #
6611: # Simply test the exponent, j-bit, and mantissa values to #
6612: # determine the type of operand. #
6613: # If it's an unnormalized zero, alter the operand and force it #
6614: # to be a normal zero. #
6615: # #
6616: #########################################################################
6617:
6618: global set_tag_x
6619: set_tag_x:
6620: mov.w FTEMP_EX(%a0), %d0 # extract exponent
6621: andi.w &0x7fff, %d0 # strip off sign
6622: cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6623: beq.b inf_or_nan_x
6624: not_inf_or_nan_x:
6625: btst &0x7,FTEMP_HI(%a0)
6626: beq.b not_norm_x
6627: is_norm_x:
6628: mov.b &NORM, %d0
6629: rts
6630: not_norm_x:
6631: tst.w %d0 # is exponent = 0?
6632: bne.b is_unnorm_x
6633: not_unnorm_x:
6634: tst.l FTEMP_HI(%a0)
6635: bne.b is_denorm_x
6636: tst.l FTEMP_LO(%a0)
6637: bne.b is_denorm_x
6638: is_zero_x:
6639: mov.b &ZERO, %d0
6640: rts
6641: is_denorm_x:
6642: mov.b &DENORM, %d0
6643: rts
6644: # must distinguish now "Unnormalized zeroes" which we
6645: # must convert to zero.
6646: is_unnorm_x:
6647: tst.l FTEMP_HI(%a0)
6648: bne.b is_unnorm_reg_x
6649: tst.l FTEMP_LO(%a0)
6650: bne.b is_unnorm_reg_x
6651: # it's an "unnormalized zero". let's convert it to an actual zero...
6652: andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6653: mov.b &ZERO, %d0
6654: rts
6655: is_unnorm_reg_x:
6656: mov.b &UNNORM, %d0
6657: rts
6658: inf_or_nan_x:
6659: tst.l FTEMP_LO(%a0)
6660: bne.b is_nan_x
6661: mov.l FTEMP_HI(%a0), %d0
6662: and.l &0x7fffffff, %d0 # msb is a don't care!
6663: bne.b is_nan_x
6664: is_inf_x:
6665: mov.b &INF, %d0
6666: rts
6667: is_nan_x:
6668: btst &0x6, FTEMP_HI(%a0)
6669: beq.b is_snan_x
6670: mov.b &QNAN, %d0
6671: rts
6672: is_snan_x:
6673: mov.b &SNAN, %d0
6674: rts
6675:
6676: #########################################################################
6677: # XDEF **************************************************************** #
6678: # set_tag_d(): return the optype of the input dbl fp number #
6679: # #
6680: # XREF **************************************************************** #
6681: # None #
6682: # #
6683: # INPUT *************************************************************** #
6684: # a0 = points to double precision operand #
6685: # #
6686: # OUTPUT ************************************************************** #
6687: # d0 = value of type tag #
6688: # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6689: # #
6690: # ALGORITHM *********************************************************** #
6691: # Simply test the exponent, j-bit, and mantissa values to #
6692: # determine the type of operand. #
6693: # #
6694: #########################################################################
6695:
6696: global set_tag_d
6697: set_tag_d:
6698: mov.l FTEMP(%a0), %d0
6699: mov.l %d0, %d1
6700:
6701: andi.l &0x7ff00000, %d0
6702: beq.b zero_or_denorm_d
6703:
6704: cmpi.l %d0, &0x7ff00000
6705: beq.b inf_or_nan_d
6706:
6707: is_norm_d:
6708: mov.b &NORM, %d0
6709: rts
6710: zero_or_denorm_d:
6711: and.l &0x000fffff, %d1
6712: bne is_denorm_d
6713: tst.l 4+FTEMP(%a0)
6714: bne is_denorm_d
6715: is_zero_d:
6716: mov.b &ZERO, %d0
6717: rts
6718: is_denorm_d:
6719: mov.b &DENORM, %d0
6720: rts
6721: inf_or_nan_d:
6722: and.l &0x000fffff, %d1
6723: bne is_nan_d
6724: tst.l 4+FTEMP(%a0)
6725: bne is_nan_d
6726: is_inf_d:
6727: mov.b &INF, %d0
6728: rts
6729: is_nan_d:
6730: btst &19, %d1
6731: bne is_qnan_d
6732: is_snan_d:
6733: mov.b &SNAN, %d0
6734: rts
6735: is_qnan_d:
6736: mov.b &QNAN, %d0
6737: rts
6738:
6739: #########################################################################
6740: # XDEF **************************************************************** #
6741: # set_tag_s(): return the optype of the input sgl fp number #
6742: # #
6743: # XREF **************************************************************** #
6744: # None #
6745: # #
6746: # INPUT *************************************************************** #
6747: # a0 = pointer to single precision operand #
6748: # #
6749: # OUTPUT ************************************************************** #
6750: # d0 = value of type tag #
6751: # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6752: # #
6753: # ALGORITHM *********************************************************** #
6754: # Simply test the exponent, j-bit, and mantissa values to #
6755: # determine the type of operand. #
6756: # #
6757: #########################################################################
6758:
6759: global set_tag_s
6760: set_tag_s:
6761: mov.l FTEMP(%a0), %d0
6762: mov.l %d0, %d1
6763:
6764: andi.l &0x7f800000, %d0
6765: beq.b zero_or_denorm_s
6766:
6767: cmpi.l %d0, &0x7f800000
6768: beq.b inf_or_nan_s
6769:
6770: is_norm_s:
6771: mov.b &NORM, %d0
6772: rts
6773: zero_or_denorm_s:
6774: and.l &0x007fffff, %d1
6775: bne is_denorm_s
6776: is_zero_s:
6777: mov.b &ZERO, %d0
6778: rts
6779: is_denorm_s:
6780: mov.b &DENORM, %d0
6781: rts
6782: inf_or_nan_s:
6783: and.l &0x007fffff, %d1
6784: bne is_nan_s
6785: is_inf_s:
6786: mov.b &INF, %d0
6787: rts
6788: is_nan_s:
6789: btst &22, %d1
6790: bne is_qnan_s
6791: is_snan_s:
6792: mov.b &SNAN, %d0
6793: rts
6794: is_qnan_s:
6795: mov.b &QNAN, %d0
6796: rts
6797:
6798: #########################################################################
6799: # XDEF **************************************************************** #
6800: # unf_res(): routine to produce default underflow result of a #
6801: # scaled extended precision number; this is used by #
6802: # fadd/fdiv/fmul/etc. emulation routines. #
6803: # unf_res4(): same as above but for fsglmul/fsgldiv which use #
6804: # single round prec and extended prec mode. #
6805: # #
6806: # XREF **************************************************************** #
6807: # _denorm() - denormalize according to scale factor #
6808: # _round() - round denormalized number according to rnd prec #
6809: # #
6810: # INPUT *************************************************************** #
6811: # a0 = pointer to extended precision operand #
6812: # d0 = scale factor #
6813: # d1 = rounding precision/mode #
6814: # #
6815: # OUTPUT ************************************************************** #
6816: # a0 = pointer to default underflow result in extended precision #
6817: # d0.b = result FPSR_cc which caller may or may not want to save #
6818: # #
6819: # ALGORITHM *********************************************************** #
6820: # Convert the input operand to "internal format" which means the #
6821: # exponent is extended to 16 bits and the sign is stored in the unused #
6822: # portion of the extended precision operand. Denormalize the number #
6823: # according to the scale factor passed in d0. Then, round the #
6824: # denormalized result. #
6825: # Set the FPSR_exc bits as appropriate but return the cc bits in #
6826: # d0 in case the caller doesn't want to save them (as is the case for #
6827: # fmove out). #
6828: # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6829: # precision and the rounding mode to single. #
6830: # #
6831: #########################################################################
6832: global unf_res
6833: unf_res:
6834: mov.l %d1, -(%sp) # save rnd prec,mode on stack
6835:
6836: btst &0x7, FTEMP_EX(%a0) # make "internal" format
6837: sne FTEMP_SGN(%a0)
6838:
6839: mov.w FTEMP_EX(%a0), %d1 # extract exponent
6840: and.w &0x7fff, %d1
6841: sub.w %d0, %d1
6842: mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6843:
6844: mov.l %a0, -(%sp) # save operand ptr during calls
6845:
6846: mov.l 0x4(%sp),%d0 # pass rnd prec.
6847: andi.w &0x00c0,%d0
6848: lsr.w &0x4,%d0
6849: bsr.l _denorm # denorm result
6850:
6851: mov.l (%sp),%a0
6852: mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6853: andi.w &0xc0,%d1 # extract rnd prec
6854: lsr.w &0x4,%d1
6855: swap %d1
6856: mov.w 0x6(%sp),%d1
6857: andi.w &0x30,%d1
6858: lsr.w &0x4,%d1
6859: bsr.l _round # round the denorm
6860:
6861: mov.l (%sp)+, %a0
6862:
6863: # result is now rounded properly. convert back to normal format
6864: bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6865: tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6866: beq.b unf_res_chkifzero # no; result is positive
6867: bset &0x7, FTEMP_EX(%a0) # set result sgn
6868: clr.b FTEMP_SGN(%a0) # clear temp sign
6869:
6870: # the number may have become zero after rounding. set ccodes accordingly.
6871: unf_res_chkifzero:
6872: clr.l %d0
6873: tst.l FTEMP_HI(%a0) # is value now a zero?
6874: bne.b unf_res_cont # no
6875: tst.l FTEMP_LO(%a0)
6876: bne.b unf_res_cont # no
6877: # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6878: bset &z_bit, %d0 # yes; set zero ccode bit
6879:
6880: unf_res_cont:
6881:
6882: #
6883: # can inex1 also be set along with unfl and inex2???
6884: #
6885: # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6886: #
6887: btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6888: beq.b unf_res_end # no
6889: bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6890:
6891: unf_res_end:
6892: add.l &0x4, %sp # clear stack
6893: rts
6894:
6895: # unf_res() for fsglmul() and fsgldiv().
6896: global unf_res4
6897: unf_res4:
6898: mov.l %d1,-(%sp) # save rnd prec,mode on stack
6899:
6900: btst &0x7,FTEMP_EX(%a0) # make "internal" format
6901: sne FTEMP_SGN(%a0)
6902:
6903: mov.w FTEMP_EX(%a0),%d1 # extract exponent
6904: and.w &0x7fff,%d1
6905: sub.w %d0,%d1
6906: mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6907:
6908: mov.l %a0,-(%sp) # save operand ptr during calls
6909:
6910: clr.l %d0 # force rnd prec = ext
6911: bsr.l _denorm # denorm result
6912:
6913: mov.l (%sp),%a0
6914: mov.w &s_mode,%d1 # force rnd prec = sgl
6915: swap %d1
6916: mov.w 0x6(%sp),%d1 # load rnd mode
6917: andi.w &0x30,%d1 # extract rnd prec
6918: lsr.w &0x4,%d1
6919: bsr.l _round # round the denorm
6920:
6921: mov.l (%sp)+,%a0
6922:
6923: # result is now rounded properly. convert back to normal format
6924: bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6925: tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6926: beq.b unf_res4_chkifzero # no; result is positive
6927: bset &0x7,FTEMP_EX(%a0) # set result sgn
6928: clr.b FTEMP_SGN(%a0) # clear temp sign
6929:
6930: # the number may have become zero after rounding. set ccodes accordingly.
6931: unf_res4_chkifzero:
6932: clr.l %d0
6933: tst.l FTEMP_HI(%a0) # is value now a zero?
6934: bne.b unf_res4_cont # no
6935: tst.l FTEMP_LO(%a0)
6936: bne.b unf_res4_cont # no
6937: # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6938: bset &z_bit,%d0 # yes; set zero ccode bit
6939:
6940: unf_res4_cont:
6941:
6942: #
6943: # can inex1 also be set along with unfl and inex2???
6944: #
6945: # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6946: #
6947: btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6948: beq.b unf_res4_end # no
6949: bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6950:
6951: unf_res4_end:
6952: add.l &0x4,%sp # clear stack
6953: rts
6954:
6955: #########################################################################
6956: # XDEF **************************************************************** #
6957: # ovf_res(): routine to produce the default overflow result of #
6958: # an overflowing number. #
6959: # ovf_res2(): same as above but the rnd mode/prec are passed #
6960: # differently. #
6961: # #
6962: # XREF **************************************************************** #
6963: # none #
6964: # #
6965: # INPUT *************************************************************** #
6966: # d1.b = '-1' => (-); '0' => (+) #
6967: # ovf_res(): #
6968: # d0 = rnd mode/prec #
6969: # ovf_res2(): #
6970: # hi(d0) = rnd prec #
6971: # lo(d0) = rnd mode #
6972: # #
6973: # OUTPUT ************************************************************** #
6974: # a0 = points to extended precision result #
6975: # d0.b = condition code bits #
6976: # #
6977: # ALGORITHM *********************************************************** #
6978: # The default overflow result can be determined by the sign of #
6979: # the result and the rounding mode/prec in effect. These bits are #
6980: # concatenated together to create an index into the default result #
6981: # table. A pointer to the correct result is returned in a0. The #
6982: # resulting condition codes are returned in d0 in case the caller #
6983: # doesn't want FPSR_cc altered (as is the case for fmove out). #
6984: # #
6985: #########################################################################
6986:
6987: global ovf_res
6988: ovf_res:
6989: andi.w &0x10,%d1 # keep result sign
6990: lsr.b &0x4,%d0 # shift prec/mode
6991: or.b %d0,%d1 # concat the two
6992: mov.w %d1,%d0 # make a copy
6993: lsl.b &0x1,%d1 # multiply d1 by 2
6994: bra.b ovf_res_load
6995:
6996: global ovf_res2
6997: ovf_res2:
6998: and.w &0x10, %d1 # keep result sign
6999: or.b %d0, %d1 # insert rnd mode
7000: swap %d0
7001: or.b %d0, %d1 # insert rnd prec
7002: mov.w %d1, %d0 # make a copy
7003: lsl.b &0x1, %d1 # shift left by 1
7004:
7005: #
7006: # use the rounding mode, precision, and result sign as in index into the
7007: # two tables below to fetch the default result and the result ccodes.
7008: #
7009: ovf_res_load:
7010: mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7011: lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7012:
7013: rts
7014:
7015: tbl_ovfl_cc:
7016: byte 0x2, 0x0, 0x0, 0x2
7017: byte 0x2, 0x0, 0x0, 0x2
7018: byte 0x2, 0x0, 0x0, 0x2
7019: byte 0x0, 0x0, 0x0, 0x0
7020: byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7021: byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7022: byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7023:
7024: tbl_ovfl_result:
7025: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7026: long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7027: long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7028: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7029:
7030: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7031: long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7032: long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7033: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7034:
7035: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7036: long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7037: long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7038: long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7039:
7040: long 0x00000000,0x00000000,0x00000000,0x00000000
7041: long 0x00000000,0x00000000,0x00000000,0x00000000
7042: long 0x00000000,0x00000000,0x00000000,0x00000000
7043: long 0x00000000,0x00000000,0x00000000,0x00000000
7044:
7045: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7046: long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7047: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7048: long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7049:
7050: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7051: long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7052: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7053: long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7054:
7055: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7056: long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7057: long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7058: long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7059:
7060: #########################################################################
7061: # XDEF **************************************************************** #
7062: # fout(): move from fp register to memory or data register #
7063: # #
7064: # XREF **************************************************************** #
7065: # _round() - needed to create EXOP for sgl/dbl precision #
7066: # norm() - needed to create EXOP for extended precision #
7067: # ovf_res() - create default overflow result for sgl/dbl precision#
7068: # unf_res() - create default underflow result for sgl/dbl prec. #
7069: # dst_dbl() - create rounded dbl precision result. #
7070: # dst_sgl() - create rounded sgl precision result. #
7071: # fetch_dreg() - fetch dynamic k-factor reg for packed. #
7072: # bindec() - convert FP binary number to packed number. #
7073: # _mem_write() - write data to memory. #
7074: # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7075: # _dmem_write_{byte,word,long}() - write data to memory. #
7076: # store_dreg_{b,w,l}() - store data to data register file. #
7077: # facc_out_{b,w,l,d,x}() - data access error occurred. #
7078: # #
7079: # INPUT *************************************************************** #
7080: # a0 = pointer to extended precision source operand #
7081: # d0 = round prec,mode #
7082: # #
7083: # OUTPUT ************************************************************** #
7084: # fp0 : intermediate underflow or overflow result if #
7085: # OVFL/UNFL occurred for a sgl or dbl operand #
7086: # #
7087: # ALGORITHM *********************************************************** #
7088: # This routine is accessed by many handlers that need to do an #
7089: # opclass three move of an operand out to memory. #
7090: # Decode an fmove out (opclass 3) instruction to determine if #
7091: # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7092: # register or memory. The algorithm uses a standard "fmove" to create #
7093: # the rounded result. Also, since exceptions are disabled, this also #
7094: # create the correct OPERR default result if appropriate. #
7095: # For sgl or dbl precision, overflow or underflow can occur. If #
7096: # either occurs and is enabled, the EXOP. #
7097: # For extended precision, the stacked <ea> must be fixed along #
7098: # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7099: # the source is a denorm and if underflow is enabled, an EXOP must be #
7100: # created. #
7101: # For packed, the k-factor must be fetched from the instruction #
7102: # word or a data register. The <ea> must be fixed as w/ extended #
7103: # precision. Then, bindec() is called to create the appropriate #
7104: # packed result. #
7105: # If at any time an access error is flagged by one of the move- #
7106: # to-memory routines, then a special exit must be made so that the #
7107: # access error can be handled properly. #
7108: # #
7109: #########################################################################
7110:
7111: global fout
7112: fout:
7113: bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7114: mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7115: jmp (tbl_fout.b,%pc,%a1) # jump to routine
7116:
7117: swbeg &0x8
7118: tbl_fout:
7119: short fout_long - tbl_fout
7120: short fout_sgl - tbl_fout
7121: short fout_ext - tbl_fout
7122: short fout_pack - tbl_fout
7123: short fout_word - tbl_fout
7124: short fout_dbl - tbl_fout
7125: short fout_byte - tbl_fout
7126: short fout_pack - tbl_fout
7127:
7128: #################################################################
7129: # fmove.b out ###################################################
7130: #################################################################
7131:
7132: # Only "Unimplemented Data Type" exceptions enter here. The operand
7133: # is either a DENORM or a NORM.
7134: fout_byte:
7135: tst.b STAG(%a6) # is operand normalized?
7136: bne.b fout_byte_denorm # no
7137:
7138: fmovm.x SRC(%a0),&0x80 # load value
7139:
7140: fout_byte_norm:
7141: fmov.l %d0,%fpcr # insert rnd prec,mode
7142:
7143: fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7144:
7145: fmov.l &0x0,%fpcr # clear FPCR
7146: fmov.l %fpsr,%d1 # fetch FPSR
7147: or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7148:
7149: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7150: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7151: beq.b fout_byte_dn # must save to integer regfile
7152:
7153: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7154: bsr.l _dmem_write_byte # write byte
7155:
7156: tst.l %d1 # did dstore fail?
7157: bne.l facc_out_b # yes
7158:
7159: rts
7160:
7161: fout_byte_dn:
7162: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7163: andi.w &0x7,%d1
7164: bsr.l store_dreg_b
7165: rts
7166:
7167: fout_byte_denorm:
7168: mov.l SRC_EX(%a0),%d1
7169: andi.l &0x80000000,%d1 # keep DENORM sign
7170: ori.l &0x00800000,%d1 # make smallest sgl
7171: fmov.s %d1,%fp0
7172: bra.b fout_byte_norm
7173:
7174: #################################################################
7175: # fmove.w out ###################################################
7176: #################################################################
7177:
7178: # Only "Unimplemented Data Type" exceptions enter here. The operand
7179: # is either a DENORM or a NORM.
7180: fout_word:
7181: tst.b STAG(%a6) # is operand normalized?
7182: bne.b fout_word_denorm # no
7183:
7184: fmovm.x SRC(%a0),&0x80 # load value
7185:
7186: fout_word_norm:
7187: fmov.l %d0,%fpcr # insert rnd prec:mode
7188:
7189: fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7190:
7191: fmov.l &0x0,%fpcr # clear FPCR
7192: fmov.l %fpsr,%d1 # fetch FPSR
7193: or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7194:
7195: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7196: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7197: beq.b fout_word_dn # must save to integer regfile
7198:
7199: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7200: bsr.l _dmem_write_word # write word
7201:
7202: tst.l %d1 # did dstore fail?
7203: bne.l facc_out_w # yes
7204:
7205: rts
7206:
7207: fout_word_dn:
7208: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7209: andi.w &0x7,%d1
7210: bsr.l store_dreg_w
7211: rts
7212:
7213: fout_word_denorm:
7214: mov.l SRC_EX(%a0),%d1
7215: andi.l &0x80000000,%d1 # keep DENORM sign
7216: ori.l &0x00800000,%d1 # make smallest sgl
7217: fmov.s %d1,%fp0
7218: bra.b fout_word_norm
7219:
7220: #################################################################
7221: # fmove.l out ###################################################
7222: #################################################################
7223:
7224: # Only "Unimplemented Data Type" exceptions enter here. The operand
7225: # is either a DENORM or a NORM.
7226: fout_long:
7227: tst.b STAG(%a6) # is operand normalized?
7228: bne.b fout_long_denorm # no
7229:
7230: fmovm.x SRC(%a0),&0x80 # load value
7231:
7232: fout_long_norm:
7233: fmov.l %d0,%fpcr # insert rnd prec:mode
7234:
7235: fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7236:
7237: fmov.l &0x0,%fpcr # clear FPCR
7238: fmov.l %fpsr,%d1 # fetch FPSR
7239: or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7240:
7241: fout_long_write:
7242: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7243: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7244: beq.b fout_long_dn # must save to integer regfile
7245:
7246: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7247: bsr.l _dmem_write_long # write long
7248:
7249: tst.l %d1 # did dstore fail?
7250: bne.l facc_out_l # yes
7251:
7252: rts
7253:
7254: fout_long_dn:
7255: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7256: andi.w &0x7,%d1
7257: bsr.l store_dreg_l
7258: rts
7259:
7260: fout_long_denorm:
7261: mov.l SRC_EX(%a0),%d1
7262: andi.l &0x80000000,%d1 # keep DENORM sign
7263: ori.l &0x00800000,%d1 # make smallest sgl
7264: fmov.s %d1,%fp0
7265: bra.b fout_long_norm
7266:
7267: #################################################################
7268: # fmove.x out ###################################################
7269: #################################################################
7270:
7271: # Only "Unimplemented Data Type" exceptions enter here. The operand
7272: # is either a DENORM or a NORM.
7273: # The DENORM causes an Underflow exception.
7274: fout_ext:
7275:
7276: # we copy the extended precision result to FP_SCR0 so that the reserved
7277: # 16-bit field gets zeroed. we do this since we promise not to disturb
7278: # what's at SRC(a0).
7279: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7280: clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7281: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7282: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7283:
7284: fmovm.x SRC(%a0),&0x80 # return result
7285:
7286: bsr.l _calc_ea_fout # fix stacked <ea>
7287:
7288: mov.l %a0,%a1 # pass: dst addr
7289: lea FP_SCR0(%a6),%a0 # pass: src addr
7290: mov.l &0xc,%d0 # pass: opsize is 12 bytes
7291:
7292: # we must not yet write the extended precision data to the stack
7293: # in the pre-decrement case from supervisor mode or else we'll corrupt
7294: # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7295: cmpi.b SPCOND_FLG(%a6),&mda7_flg
7296: beq.b fout_ext_a7
7297:
7298: bsr.l _dmem_write # write ext prec number to memory
7299:
7300: tst.l %d1 # did dstore fail?
7301: bne.w fout_ext_err # yes
7302:
7303: tst.b STAG(%a6) # is operand normalized?
7304: bne.b fout_ext_denorm # no
7305: rts
7306:
7307: # the number is a DENORM. must set the underflow exception bit
7308: fout_ext_denorm:
7309: bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7310:
7311: mov.b FPCR_ENABLE(%a6),%d0
7312: andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7313: bne.b fout_ext_exc # yes
7314: rts
7315:
7316: # we don't want to do the write if the exception occurred in supervisor mode
7317: # so _mem_write2() handles this for us.
7318: fout_ext_a7:
7319: bsr.l _mem_write2 # write ext prec number to memory
7320:
7321: tst.l %d1 # did dstore fail?
7322: bne.w fout_ext_err # yes
7323:
7324: tst.b STAG(%a6) # is operand normalized?
7325: bne.b fout_ext_denorm # no
7326: rts
7327:
7328: fout_ext_exc:
7329: lea FP_SCR0(%a6),%a0
7330: bsr.l norm # normalize the mantissa
7331: neg.w %d0 # new exp = -(shft amt)
7332: andi.w &0x7fff,%d0
7333: andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7334: or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7335: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7336: rts
7337:
7338: fout_ext_err:
7339: mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7340: bra.l facc_out_x
7341:
7342: #########################################################################
7343: # fmove.s out ###########################################################
7344: #########################################################################
7345: fout_sgl:
7346: andi.b &0x30,%d0 # clear rnd prec
7347: ori.b &s_mode*0x10,%d0 # insert sgl prec
7348: mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7349:
7350: #
7351: # operand is a normalized number. first, we check to see if the move out
7352: # would cause either an underflow or overflow. these cases are handled
7353: # separately. otherwise, set the FPCR to the proper rounding mode and
7354: # execute the move.
7355: #
7356: mov.w SRC_EX(%a0),%d0 # extract exponent
7357: andi.w &0x7fff,%d0 # strip sign
7358:
7359: cmpi.w %d0,&SGL_HI # will operand overflow?
7360: bgt.w fout_sgl_ovfl # yes; go handle OVFL
7361: beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7362: cmpi.w %d0,&SGL_LO # will operand underflow?
7363: blt.w fout_sgl_unfl # yes; go handle underflow
7364:
7365: #
7366: # NORMs(in range) can be stored out by a simple "fmov.s"
7367: # Unnormalized inputs can come through this point.
7368: #
7369: fout_sgl_exg:
7370: fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7371:
7372: fmov.l L_SCR3(%a6),%fpcr # set FPCR
7373: fmov.l &0x0,%fpsr # clear FPSR
7374:
7375: fmov.s %fp0,%d0 # store does convert and round
7376:
7377: fmov.l &0x0,%fpcr # clear FPCR
7378: fmov.l %fpsr,%d1 # save FPSR
7379:
7380: or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7381:
7382: fout_sgl_exg_write:
7383: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7384: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7385: beq.b fout_sgl_exg_write_dn # must save to integer regfile
7386:
7387: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7388: bsr.l _dmem_write_long # write long
7389:
7390: tst.l %d1 # did dstore fail?
7391: bne.l facc_out_l # yes
7392:
7393: rts
7394:
7395: fout_sgl_exg_write_dn:
7396: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7397: andi.w &0x7,%d1
7398: bsr.l store_dreg_l
7399: rts
7400:
7401: #
7402: # here, we know that the operand would UNFL if moved out to single prec,
7403: # so, denorm and round and then use generic store single routine to
7404: # write the value to memory.
7405: #
7406: fout_sgl_unfl:
7407: bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7408:
7409: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7410: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7411: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7412: mov.l %a0,-(%sp)
7413:
7414: clr.l %d0 # pass: S.F. = 0
7415:
7416: cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7417: bne.b fout_sgl_unfl_cont # let DENORMs fall through
7418:
7419: lea FP_SCR0(%a6),%a0
7420: bsr.l norm # normalize the DENORM
7421:
7422: fout_sgl_unfl_cont:
7423: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7424: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7425: bsr.l unf_res # calc default underflow result
7426:
7427: lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7428: bsr.l dst_sgl # convert to single prec
7429:
7430: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7431: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7432: beq.b fout_sgl_unfl_dn # must save to integer regfile
7433:
7434: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7435: bsr.l _dmem_write_long # write long
7436:
7437: tst.l %d1 # did dstore fail?
7438: bne.l facc_out_l # yes
7439:
7440: bra.b fout_sgl_unfl_chkexc
7441:
7442: fout_sgl_unfl_dn:
7443: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7444: andi.w &0x7,%d1
7445: bsr.l store_dreg_l
7446:
7447: fout_sgl_unfl_chkexc:
7448: mov.b FPCR_ENABLE(%a6),%d1
7449: andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7450: bne.w fout_sd_exc_unfl # yes
7451: addq.l &0x4,%sp
7452: rts
7453:
7454: #
7455: # it's definitely an overflow so call ovf_res to get the correct answer
7456: #
7457: fout_sgl_ovfl:
7458: tst.b 3+SRC_HI(%a0) # is result inexact?
7459: bne.b fout_sgl_ovfl_inex2
7460: tst.l SRC_LO(%a0) # is result inexact?
7461: bne.b fout_sgl_ovfl_inex2
7462: ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7463: bra.b fout_sgl_ovfl_cont
7464: fout_sgl_ovfl_inex2:
7465: ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7466:
7467: fout_sgl_ovfl_cont:
7468: mov.l %a0,-(%sp)
7469:
7470: # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7471: # overflow result. DON'T save the returned ccodes from ovf_res() since
7472: # fmove out doesn't alter them.
7473: tst.b SRC_EX(%a0) # is operand negative?
7474: smi %d1 # set if so
7475: mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7476: bsr.l ovf_res # calc OVFL result
7477: fmovm.x (%a0),&0x80 # load default overflow result
7478: fmov.s %fp0,%d0 # store to single
7479:
7480: mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7481: andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7482: beq.b fout_sgl_ovfl_dn # must save to integer regfile
7483:
7484: mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7485: bsr.l _dmem_write_long # write long
7486:
7487: tst.l %d1 # did dstore fail?
7488: bne.l facc_out_l # yes
7489:
7490: bra.b fout_sgl_ovfl_chkexc
7491:
7492: fout_sgl_ovfl_dn:
7493: mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7494: andi.w &0x7,%d1
7495: bsr.l store_dreg_l
7496:
7497: fout_sgl_ovfl_chkexc:
7498: mov.b FPCR_ENABLE(%a6),%d1
7499: andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7500: bne.w fout_sd_exc_ovfl # yes
7501: addq.l &0x4,%sp
7502: rts
7503:
7504: #
7505: # move out MAY overflow:
7506: # (1) force the exp to 0x3fff
7507: # (2) do a move w/ appropriate rnd mode
7508: # (3) if exp still equals zero, then insert original exponent
7509: # for the correct result.
7510: # if exp now equals one, then it overflowed so call ovf_res.
7511: #
7512: fout_sgl_may_ovfl:
7513: mov.w SRC_EX(%a0),%d1 # fetch current sign
7514: andi.w &0x8000,%d1 # keep it,clear exp
7515: ori.w &0x3fff,%d1 # insert exp = 0
7516: mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7517: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7518: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7519:
7520: fmov.l L_SCR3(%a6),%fpcr # set FPCR
7521:
7522: fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7523: fmov.l &0x0,%fpcr # clear FPCR
7524:
7525: fabs.x %fp0 # need absolute value
7526: fcmp.b %fp0,&0x2 # did exponent increase?
7527: fblt.w fout_sgl_exg # no; go finish NORM
7528: bra.w fout_sgl_ovfl # yes; go handle overflow
7529:
7530: ################
7531:
7532: fout_sd_exc_unfl:
7533: mov.l (%sp)+,%a0
7534:
7535: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7536: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7537: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7538:
7539: cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7540: bne.b fout_sd_exc_cont # no
7541:
7542: lea FP_SCR0(%a6),%a0
7543: bsr.l norm
7544: neg.l %d0
7545: andi.w &0x7fff,%d0
7546: bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7547: bra.b fout_sd_exc_cont
7548:
7549: fout_sd_exc:
7550: fout_sd_exc_ovfl:
7551: mov.l (%sp)+,%a0 # restore a0
7552:
7553: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7554: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7555: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7556:
7557: fout_sd_exc_cont:
7558: bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7559: sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7560: lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7561:
7562: mov.b 3+L_SCR3(%a6),%d1
7563: lsr.b &0x4,%d1
7564: andi.w &0x0c,%d1
7565: swap %d1
7566: mov.b 3+L_SCR3(%a6),%d1
7567: lsr.b &0x4,%d1
7568: andi.w &0x03,%d1
7569: clr.l %d0 # pass: zero g,r,s
7570: bsr.l _round # round the DENORM
7571:
7572: tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7573: beq.b fout_sd_exc_done # no
7574: bset &0x7,FP_SCR0_EX(%a6) # yes
7575:
7576: fout_sd_exc_done:
7577: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7578: rts
7579:
7580: #################################################################
7581: # fmove.d out ###################################################
7582: #################################################################
7583: fout_dbl:
7584: andi.b &0x30,%d0 # clear rnd prec
7585: ori.b &d_mode*0x10,%d0 # insert dbl prec
7586: mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7587:
7588: #
7589: # operand is a normalized number. first, we check to see if the move out
7590: # would cause either an underflow or overflow. these cases are handled
7591: # separately. otherwise, set the FPCR to the proper rounding mode and
7592: # execute the move.
7593: #
7594: mov.w SRC_EX(%a0),%d0 # extract exponent
7595: andi.w &0x7fff,%d0 # strip sign
7596:
7597: cmpi.w %d0,&DBL_HI # will operand overflow?
7598: bgt.w fout_dbl_ovfl # yes; go handle OVFL
7599: beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7600: cmpi.w %d0,&DBL_LO # will operand underflow?
7601: blt.w fout_dbl_unfl # yes; go handle underflow
7602:
7603: #
7604: # NORMs(in range) can be stored out by a simple "fmov.d"
7605: # Unnormalized inputs can come through this point.
7606: #
7607: fout_dbl_exg:
7608: fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7609:
7610: fmov.l L_SCR3(%a6),%fpcr # set FPCR
7611: fmov.l &0x0,%fpsr # clear FPSR
7612:
7613: fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7614:
7615: fmov.l &0x0,%fpcr # clear FPCR
7616: fmov.l %fpsr,%d0 # save FPSR
7617:
7618: or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7619:
7620: mov.l EXC_EA(%a6),%a1 # pass: dst addr
7621: lea L_SCR1(%a6),%a0 # pass: src addr
7622: movq.l &0x8,%d0 # pass: opsize is 8 bytes
7623: bsr.l _dmem_write # store dbl fop to memory
7624:
7625: tst.l %d1 # did dstore fail?
7626: bne.l facc_out_d # yes
7627:
7628: rts # no; so we're finished
7629:
7630: #
7631: # here, we know that the operand would UNFL if moved out to double prec,
7632: # so, denorm and round and then use generic store double routine to
7633: # write the value to memory.
7634: #
7635: fout_dbl_unfl:
7636: bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7637:
7638: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7639: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7640: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7641: mov.l %a0,-(%sp)
7642:
7643: clr.l %d0 # pass: S.F. = 0
7644:
7645: cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7646: bne.b fout_dbl_unfl_cont # let DENORMs fall through
7647:
7648: lea FP_SCR0(%a6),%a0
7649: bsr.l norm # normalize the DENORM
7650:
7651: fout_dbl_unfl_cont:
7652: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7653: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7654: bsr.l unf_res # calc default underflow result
7655:
7656: lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7657: bsr.l dst_dbl # convert to single prec
7658: mov.l %d0,L_SCR1(%a6)
7659: mov.l %d1,L_SCR2(%a6)
7660:
7661: mov.l EXC_EA(%a6),%a1 # pass: dst addr
7662: lea L_SCR1(%a6),%a0 # pass: src addr
7663: movq.l &0x8,%d0 # pass: opsize is 8 bytes
7664: bsr.l _dmem_write # store dbl fop to memory
7665:
7666: tst.l %d1 # did dstore fail?
7667: bne.l facc_out_d # yes
7668:
7669: mov.b FPCR_ENABLE(%a6),%d1
7670: andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7671: bne.w fout_sd_exc_unfl # yes
7672: addq.l &0x4,%sp
7673: rts
7674:
7675: #
7676: # it's definitely an overflow so call ovf_res to get the correct answer
7677: #
7678: fout_dbl_ovfl:
7679: mov.w 2+SRC_LO(%a0),%d0
7680: andi.w &0x7ff,%d0
7681: bne.b fout_dbl_ovfl_inex2
7682:
7683: ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7684: bra.b fout_dbl_ovfl_cont
7685: fout_dbl_ovfl_inex2:
7686: ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7687:
7688: fout_dbl_ovfl_cont:
7689: mov.l %a0,-(%sp)
7690:
7691: # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7692: # overflow result. DON'T save the returned ccodes from ovf_res() since
7693: # fmove out doesn't alter them.
7694: tst.b SRC_EX(%a0) # is operand negative?
7695: smi %d1 # set if so
7696: mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7697: bsr.l ovf_res # calc OVFL result
7698: fmovm.x (%a0),&0x80 # load default overflow result
7699: fmov.d %fp0,L_SCR1(%a6) # store to double
7700:
7701: mov.l EXC_EA(%a6),%a1 # pass: dst addr
7702: lea L_SCR1(%a6),%a0 # pass: src addr
7703: movq.l &0x8,%d0 # pass: opsize is 8 bytes
7704: bsr.l _dmem_write # store dbl fop to memory
7705:
7706: tst.l %d1 # did dstore fail?
7707: bne.l facc_out_d # yes
7708:
7709: mov.b FPCR_ENABLE(%a6),%d1
7710: andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7711: bne.w fout_sd_exc_ovfl # yes
7712: addq.l &0x4,%sp
7713: rts
7714:
7715: #
7716: # move out MAY overflow:
7717: # (1) force the exp to 0x3fff
7718: # (2) do a move w/ appropriate rnd mode
7719: # (3) if exp still equals zero, then insert original exponent
7720: # for the correct result.
7721: # if exp now equals one, then it overflowed so call ovf_res.
7722: #
7723: fout_dbl_may_ovfl:
7724: mov.w SRC_EX(%a0),%d1 # fetch current sign
7725: andi.w &0x8000,%d1 # keep it,clear exp
7726: ori.w &0x3fff,%d1 # insert exp = 0
7727: mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7728: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7729: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7730:
7731: fmov.l L_SCR3(%a6),%fpcr # set FPCR
7732:
7733: fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7734: fmov.l &0x0,%fpcr # clear FPCR
7735:
7736: fabs.x %fp0 # need absolute value
7737: fcmp.b %fp0,&0x2 # did exponent increase?
7738: fblt.w fout_dbl_exg # no; go finish NORM
7739: bra.w fout_dbl_ovfl # yes; go handle overflow
7740:
7741: #########################################################################
7742: # XDEF **************************************************************** #
7743: # dst_dbl(): create double precision value from extended prec. #
7744: # #
7745: # XREF **************************************************************** #
7746: # None #
7747: # #
7748: # INPUT *************************************************************** #
7749: # a0 = pointer to source operand in extended precision #
7750: # #
7751: # OUTPUT ************************************************************** #
7752: # d0 = hi(double precision result) #
7753: # d1 = lo(double precision result) #
7754: # #
7755: # ALGORITHM *********************************************************** #
7756: # #
7757: # Changes extended precision to double precision. #
7758: # Note: no attempt is made to round the extended value to double. #
7759: # dbl_sign = ext_sign #
7760: # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7761: # get rid of ext integer bit #
7762: # dbl_mant = ext_mant{62:12} #
7763: # #
7764: # --------------- --------------- --------------- #
7765: # extended -> |s| exp | |1| ms mant | | ls mant | #
7766: # --------------- --------------- --------------- #
7767: # 95 64 63 62 32 31 11 0 #
7768: # | | #
7769: # | | #
7770: # | | #
7771: # v v #
7772: # --------------- --------------- #
7773: # double -> |s|exp| mant | | mant | #
7774: # --------------- --------------- #
7775: # 63 51 32 31 0 #
7776: # #
7777: #########################################################################
7778:
7779: dst_dbl:
7780: clr.l %d0 # clear d0
7781: mov.w FTEMP_EX(%a0),%d0 # get exponent
7782: subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7783: addi.w &DBL_BIAS,%d0 # add double precision bias
7784: tst.b FTEMP_HI(%a0) # is number a denorm?
7785: bmi.b dst_get_dupper # no
7786: subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7787: dst_get_dupper:
7788: swap %d0 # d0 now in upper word
7789: lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7790: tst.b FTEMP_EX(%a0) # test sign
7791: bpl.b dst_get_dman # if positive, go process mantissa
7792: bset &0x1f,%d0 # if negative, set sign
7793: dst_get_dman:
7794: mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7795: bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7796: or.l %d1,%d0 # put these bits in ms word of double
7797: mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7798: mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7799: mov.l &21,%d0 # load shift count
7800: lsl.l %d0,%d1 # put lower 11 bits in upper bits
7801: mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7802: mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7803: bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7804: mov.l L_SCR2(%a6),%d1
7805: or.l %d0,%d1 # put them in double result
7806: mov.l L_SCR1(%a6),%d0
7807: rts
7808:
7809: #########################################################################
7810: # XDEF **************************************************************** #
7811: # dst_sgl(): create single precision value from extended prec #
7812: # #
7813: # XREF **************************************************************** #
7814: # #
7815: # INPUT *************************************************************** #
7816: # a0 = pointer to source operand in extended precision #
7817: # #
7818: # OUTPUT ************************************************************** #
7819: # d0 = single precision result #
7820: # #
7821: # ALGORITHM *********************************************************** #
7822: # #
7823: # Changes extended precision to single precision. #
7824: # sgl_sign = ext_sign #
7825: # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7826: # get rid of ext integer bit #
7827: # sgl_mant = ext_mant{62:12} #
7828: # #
7829: # --------------- --------------- --------------- #
7830: # extended -> |s| exp | |1| ms mant | | ls mant | #
7831: # --------------- --------------- --------------- #
7832: # 95 64 63 62 40 32 31 12 0 #
7833: # | | #
7834: # | | #
7835: # | | #
7836: # v v #
7837: # --------------- #
7838: # single -> |s|exp| mant | #
7839: # --------------- #
7840: # 31 22 0 #
7841: # #
7842: #########################################################################
7843:
7844: dst_sgl:
7845: clr.l %d0
7846: mov.w FTEMP_EX(%a0),%d0 # get exponent
7847: subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7848: addi.w &SGL_BIAS,%d0 # add single precision bias
7849: tst.b FTEMP_HI(%a0) # is number a denorm?
7850: bmi.b dst_get_supper # no
7851: subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7852: dst_get_supper:
7853: swap %d0 # put exp in upper word of d0
7854: lsl.l &0x7,%d0 # shift it into single exp bits
7855: tst.b FTEMP_EX(%a0) # test sign
7856: bpl.b dst_get_sman # if positive, continue
7857: bset &0x1f,%d0 # if negative, put in sign first
7858: dst_get_sman:
7859: mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7860: andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7861: lsr.l &0x8,%d1 # and put them flush right
7862: or.l %d1,%d0 # put these bits in ms word of single
7863: rts
7864:
7865: ##############################################################################
7866: fout_pack:
7867: bsr.l _calc_ea_fout # fetch the <ea>
7868: mov.l %a0,-(%sp)
7869:
7870: mov.b STAG(%a6),%d0 # fetch input type
7871: bne.w fout_pack_not_norm # input is not NORM
7872:
7873: fout_pack_norm:
7874: btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7875: beq.b fout_pack_s # static
7876:
7877: fout_pack_d:
7878: mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7879: lsr.b &0x4,%d1
7880: andi.w &0x7,%d1
7881:
7882: bsr.l fetch_dreg # fetch Dn w/ k-factor
7883:
7884: bra.b fout_pack_type
7885: fout_pack_s:
7886: mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7887:
7888: fout_pack_type:
7889: bfexts %d0{&25:&7},%d0 # extract k-factor
7890: mov.l %d0,-(%sp)
7891:
7892: lea FP_SRC(%a6),%a0 # pass: ptr to input
7893:
7894: # bindec is currently scrambling FP_SRC for denorm inputs.
7895: # we'll have to change this, but for now, tough luck!!!
7896: bsr.l bindec # convert xprec to packed
7897:
7898: # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7899: andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7900:
7901: mov.l (%sp)+,%d0
7902:
7903: tst.b 3+FP_SCR0_EX(%a6)
7904: bne.b fout_pack_set
7905: tst.l FP_SCR0_HI(%a6)
7906: bne.b fout_pack_set
7907: tst.l FP_SCR0_LO(%a6)
7908: bne.b fout_pack_set
7909:
7910: # add the extra condition that only if the k-factor was zero, too, should
7911: # we zero the exponent
7912: tst.l %d0
7913: bne.b fout_pack_set
7914: # "mantissa" is all zero which means that the answer is zero. but, the '040
7915: # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
7916: # if the mantissa is zero, I will zero the exponent, too.
7917: # the question now is whether the exponents sign bit is allowed to be non-zero
7918: # for a zero, also...
7919: andi.w &0xf000,FP_SCR0(%a6)
7920:
7921: fout_pack_set:
7922:
7923: lea FP_SCR0(%a6),%a0 # pass: src addr
7924:
7925: fout_pack_write:
7926: mov.l (%sp)+,%a1 # pass: dst addr
7927: mov.l &0xc,%d0 # pass: opsize is 12 bytes
7928:
7929: cmpi.b SPCOND_FLG(%a6),&mda7_flg
7930: beq.b fout_pack_a7
7931:
7932: bsr.l _dmem_write # write ext prec number to memory
7933:
7934: tst.l %d1 # did dstore fail?
7935: bne.w fout_ext_err # yes
7936:
7937: rts
7938:
7939: # we don't want to do the write if the exception occurred in supervisor mode
7940: # so _mem_write2() handles this for us.
7941: fout_pack_a7:
7942: bsr.l _mem_write2 # write ext prec number to memory
7943:
7944: tst.l %d1 # did dstore fail?
7945: bne.w fout_ext_err # yes
7946:
7947: rts
7948:
7949: fout_pack_not_norm:
7950: cmpi.b %d0,&DENORM # is it a DENORM?
7951: beq.w fout_pack_norm # yes
7952: lea FP_SRC(%a6),%a0
7953: clr.w 2+FP_SRC_EX(%a6)
7954: cmpi.b %d0,&SNAN # is it an SNAN?
7955: beq.b fout_pack_snan # yes
7956: bra.b fout_pack_write # no
7957:
7958: fout_pack_snan:
7959: ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7960: bset &0x6,FP_SRC_HI(%a6) # set snan bit
7961: bra.b fout_pack_write
7962:
7963: #########################################################################
7964: # XDEF **************************************************************** #
7965: # fmul(): emulates the fmul instruction #
7966: # fsmul(): emulates the fsmul instruction #
7967: # fdmul(): emulates the fdmul instruction #
7968: # #
7969: # XREF **************************************************************** #
7970: # scale_to_zero_src() - scale src exponent to zero #
7971: # scale_to_zero_dst() - scale dst exponent to zero #
7972: # unf_res() - return default underflow result #
7973: # ovf_res() - return default overflow result #
7974: # res_qnan() - return QNAN result #
7975: # res_snan() - return SNAN result #
7976: # #
7977: # INPUT *************************************************************** #
7978: # a0 = pointer to extended precision source operand #
7979: # a1 = pointer to extended precision destination operand #
7980: # d0 rnd prec,mode #
7981: # #
7982: # OUTPUT ************************************************************** #
7983: # fp0 = result #
7984: # fp1 = EXOP (if exception occurred) #
7985: # #
7986: # ALGORITHM *********************************************************** #
7987: # Handle NANs, infinities, and zeroes as special cases. Divide #
7988: # norms/denorms into ext/sgl/dbl precision. #
7989: # For norms/denorms, scale the exponents such that a multiply #
7990: # instruction won't cause an exception. Use the regular fmul to #
7991: # compute a result. Check if the regular operands would have taken #
7992: # an exception. If so, return the default overflow/underflow result #
7993: # and return the EXOP if exceptions are enabled. Else, scale the #
7994: # result operand to the proper exponent. #
7995: # #
7996: #########################################################################
7997:
7998: align 0x10
7999: tbl_fmul_ovfl:
8000: long 0x3fff - 0x7ffe # ext_max
8001: long 0x3fff - 0x407e # sgl_max
8002: long 0x3fff - 0x43fe # dbl_max
8003: tbl_fmul_unfl:
8004: long 0x3fff + 0x0001 # ext_unfl
8005: long 0x3fff - 0x3f80 # sgl_unfl
8006: long 0x3fff - 0x3c00 # dbl_unfl
8007:
8008: global fsmul
8009: fsmul:
8010: andi.b &0x30,%d0 # clear rnd prec
8011: ori.b &s_mode*0x10,%d0 # insert sgl prec
8012: bra.b fmul
8013:
8014: global fdmul
8015: fdmul:
8016: andi.b &0x30,%d0
8017: ori.b &d_mode*0x10,%d0 # insert dbl prec
8018:
8019: global fmul
8020: fmul:
8021: mov.l %d0,L_SCR3(%a6) # store rnd info
8022:
8023: clr.w %d1
8024: mov.b DTAG(%a6),%d1
8025: lsl.b &0x3,%d1
8026: or.b STAG(%a6),%d1 # combine src tags
8027: bne.w fmul_not_norm # optimize on non-norm input
8028:
8029: fmul_norm:
8030: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8031: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8032: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8033:
8034: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8035: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8036: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8037:
8038: bsr.l scale_to_zero_src # scale src exponent
8039: mov.l %d0,-(%sp) # save scale factor 1
8040:
8041: bsr.l scale_to_zero_dst # scale dst exponent
8042:
8043: add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8044:
8045: mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8046: lsr.b &0x6,%d1 # shift to lo bits
8047: mov.l (%sp)+,%d0 # load S.F.
8048: cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8049: beq.w fmul_may_ovfl # result may rnd to overflow
8050: blt.w fmul_ovfl # result will overflow
8051:
8052: cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8053: beq.w fmul_may_unfl # result may rnd to no unfl
8054: bgt.w fmul_unfl # result will underflow
8055:
8056: #
8057: # NORMAL:
8058: # - the result of the multiply operation will neither overflow nor underflow.
8059: # - do the multiply to the proper precision and rounding mode.
8060: # - scale the result exponent using the scale factor. if both operands were
8061: # normalized then we really don't need to go through this scaling. but for now,
8062: # this will do.
8063: #
8064: fmul_normal:
8065: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8066:
8067: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8068: fmov.l &0x0,%fpsr # clear FPSR
8069:
8070: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8071:
8072: fmov.l %fpsr,%d1 # save status
8073: fmov.l &0x0,%fpcr # clear FPCR
8074:
8075: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8076:
8077: fmul_normal_exit:
8078: fmovm.x &0x80,FP_SCR0(%a6) # store out result
8079: mov.l %d2,-(%sp) # save d2
8080: mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8081: mov.l %d1,%d2 # make a copy
8082: andi.l &0x7fff,%d1 # strip sign
8083: andi.w &0x8000,%d2 # keep old sign
8084: sub.l %d0,%d1 # add scale factor
8085: or.w %d2,%d1 # concat old sign,new exp
8086: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8087: mov.l (%sp)+,%d2 # restore d2
8088: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8089: rts
8090:
8091: #
8092: # OVERFLOW:
8093: # - the result of the multiply operation is an overflow.
8094: # - do the multiply to the proper precision and rounding mode in order to
8095: # set the inexact bits.
8096: # - calculate the default result and return it in fp0.
8097: # - if overflow or inexact is enabled, we need a multiply result rounded to
8098: # extended precision. if the original operation was extended, then we have this
8099: # result. if the original operation was single or double, we have to do another
8100: # multiply using extended precision and the correct rounding mode. the result
8101: # of this operation then has its exponent scaled by -0x6000 to create the
8102: # exceptional operand.
8103: #
8104: fmul_ovfl:
8105: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8106:
8107: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8108: fmov.l &0x0,%fpsr # clear FPSR
8109:
8110: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8111:
8112: fmov.l %fpsr,%d1 # save status
8113: fmov.l &0x0,%fpcr # clear FPCR
8114:
8115: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8116:
8117: # save setting this until now because this is where fmul_may_ovfl may jump in
8118: fmul_ovfl_tst:
8119: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8120:
8121: mov.b FPCR_ENABLE(%a6),%d1
8122: andi.b &0x13,%d1 # is OVFL or INEX enabled?
8123: bne.b fmul_ovfl_ena # yes
8124:
8125: # calculate the default result
8126: fmul_ovfl_dis:
8127: btst &neg_bit,FPSR_CC(%a6) # is result negative?
8128: sne %d1 # set sign param accordingly
8129: mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8130: bsr.l ovf_res # calculate default result
8131: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8132: fmovm.x (%a0),&0x80 # return default result in fp0
8133: rts
8134:
8135: #
8136: # OVFL is enabled; Create EXOP:
8137: # - if precision is extended, then we have the EXOP. simply bias the exponent
8138: # with an extra -0x6000. if the precision is single or double, we need to
8139: # calculate a result rounded to extended precision.
8140: #
8141: fmul_ovfl_ena:
8142: mov.l L_SCR3(%a6),%d1
8143: andi.b &0xc0,%d1 # test the rnd prec
8144: bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8145:
8146: fmul_ovfl_ena_cont:
8147: fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8148:
8149: mov.l %d2,-(%sp) # save d2
8150: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8151: mov.w %d1,%d2 # make a copy
8152: andi.l &0x7fff,%d1 # strip sign
8153: sub.l %d0,%d1 # add scale factor
8154: subi.l &0x6000,%d1 # subtract bias
8155: andi.w &0x7fff,%d1 # clear sign bit
8156: andi.w &0x8000,%d2 # keep old sign
8157: or.w %d2,%d1 # concat old sign,new exp
8158: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8159: mov.l (%sp)+,%d2 # restore d2
8160: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8161: bra.b fmul_ovfl_dis
8162:
8163: fmul_ovfl_ena_sd:
8164: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8165:
8166: mov.l L_SCR3(%a6),%d1
8167: andi.b &0x30,%d1 # keep rnd mode only
8168: fmov.l %d1,%fpcr # set FPCR
8169:
8170: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8171:
8172: fmov.l &0x0,%fpcr # clear FPCR
8173: bra.b fmul_ovfl_ena_cont
8174:
8175: #
8176: # may OVERFLOW:
8177: # - the result of the multiply operation MAY overflow.
8178: # - do the multiply to the proper precision and rounding mode in order to
8179: # set the inexact bits.
8180: # - calculate the default result and return it in fp0.
8181: #
8182: fmul_may_ovfl:
8183: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8184:
8185: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8186: fmov.l &0x0,%fpsr # clear FPSR
8187:
8188: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8189:
8190: fmov.l %fpsr,%d1 # save status
8191: fmov.l &0x0,%fpcr # clear FPCR
8192:
8193: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8194:
8195: fabs.x %fp0,%fp1 # make a copy of result
8196: fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8197: fbge.w fmul_ovfl_tst # yes; overflow has occurred
8198:
8199: # no, it didn't overflow; we have correct result
8200: bra.w fmul_normal_exit
8201:
8202: #
8203: # UNDERFLOW:
8204: # - the result of the multiply operation is an underflow.
8205: # - do the multiply to the proper precision and rounding mode in order to
8206: # set the inexact bits.
8207: # - calculate the default result and return it in fp0.
8208: # - if overflow or inexact is enabled, we need a multiply result rounded to
8209: # extended precision. if the original operation was extended, then we have this
8210: # result. if the original operation was single or double, we have to do another
8211: # multiply using extended precision and the correct rounding mode. the result
8212: # of this operation then has its exponent scaled by -0x6000 to create the
8213: # exceptional operand.
8214: #
8215: fmul_unfl:
8216: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8217:
8218: # for fun, let's use only extended precision, round to zero. then, let
8219: # the unf_res() routine figure out all the rest.
8220: # will we get the correct answer.
8221: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8222:
8223: fmov.l &rz_mode*0x10,%fpcr # set FPCR
8224: fmov.l &0x0,%fpsr # clear FPSR
8225:
8226: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8227:
8228: fmov.l %fpsr,%d1 # save status
8229: fmov.l &0x0,%fpcr # clear FPCR
8230:
8231: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8232:
8233: mov.b FPCR_ENABLE(%a6),%d1
8234: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8235: bne.b fmul_unfl_ena # yes
8236:
8237: fmul_unfl_dis:
8238: fmovm.x &0x80,FP_SCR0(%a6) # store out result
8239:
8240: lea FP_SCR0(%a6),%a0 # pass: result addr
8241: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8242: bsr.l unf_res # calculate default result
8243: or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8244: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8245: rts
8246:
8247: #
8248: # UNFL is enabled.
8249: #
8250: fmul_unfl_ena:
8251: fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8252:
8253: mov.l L_SCR3(%a6),%d1
8254: andi.b &0xc0,%d1 # is precision extended?
8255: bne.b fmul_unfl_ena_sd # no, sgl or dbl
8256:
8257: # if the rnd mode is anything but RZ, then we have to re-do the above
8258: # multiplication because we used RZ for all.
8259: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8260:
8261: fmul_unfl_ena_cont:
8262: fmov.l &0x0,%fpsr # clear FPSR
8263:
8264: fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8265:
8266: fmov.l &0x0,%fpcr # clear FPCR
8267:
8268: fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8269: mov.l %d2,-(%sp) # save d2
8270: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8271: mov.l %d1,%d2 # make a copy
8272: andi.l &0x7fff,%d1 # strip sign
8273: andi.w &0x8000,%d2 # keep old sign
8274: sub.l %d0,%d1 # add scale factor
8275: addi.l &0x6000,%d1 # add bias
8276: andi.w &0x7fff,%d1
8277: or.w %d2,%d1 # concat old sign,new exp
8278: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8279: mov.l (%sp)+,%d2 # restore d2
8280: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8281: bra.w fmul_unfl_dis
8282:
8283: fmul_unfl_ena_sd:
8284: mov.l L_SCR3(%a6),%d1
8285: andi.b &0x30,%d1 # use only rnd mode
8286: fmov.l %d1,%fpcr # set FPCR
8287:
8288: bra.b fmul_unfl_ena_cont
8289:
8290: # MAY UNDERFLOW:
8291: # -use the correct rounding mode and precision. this code favors operations
8292: # that do not underflow.
8293: fmul_may_unfl:
8294: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8295:
8296: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8297: fmov.l &0x0,%fpsr # clear FPSR
8298:
8299: fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8300:
8301: fmov.l %fpsr,%d1 # save status
8302: fmov.l &0x0,%fpcr # clear FPCR
8303:
8304: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8305:
8306: fabs.x %fp0,%fp1 # make a copy of result
8307: fcmp.b %fp1,&0x2 # is |result| > 2.b?
8308: fbgt.w fmul_normal_exit # no; no underflow occurred
8309: fblt.w fmul_unfl # yes; underflow occurred
8310:
8311: #
8312: # we still don't know if underflow occurred. result is ~ equal to 2. but,
8313: # we don't know if the result was an underflow that rounded up to a 2 or
8314: # a normalized number that rounded down to a 2. so, redo the entire operation
8315: # using RZ as the rounding mode to see what the pre-rounded result is.
8316: # this case should be relatively rare.
8317: #
8318: fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8319:
8320: mov.l L_SCR3(%a6),%d1
8321: andi.b &0xc0,%d1 # keep rnd prec
8322: ori.b &rz_mode*0x10,%d1 # insert RZ
8323:
8324: fmov.l %d1,%fpcr # set FPCR
8325: fmov.l &0x0,%fpsr # clear FPSR
8326:
8327: fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8328:
8329: fmov.l &0x0,%fpcr # clear FPCR
8330: fabs.x %fp1 # make absolute value
8331: fcmp.b %fp1,&0x2 # is |result| < 2.b?
8332: fbge.w fmul_normal_exit # no; no underflow occurred
8333: bra.w fmul_unfl # yes, underflow occurred
8334:
8335: ################################################################################
8336:
8337: #
8338: # Multiply: inputs are not both normalized; what are they?
8339: #
8340: fmul_not_norm:
8341: mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8342: jmp (tbl_fmul_op.b,%pc,%d1.w)
8343:
8344: swbeg &48
8345: tbl_fmul_op:
8346: short fmul_norm - tbl_fmul_op # NORM x NORM
8347: short fmul_zero - tbl_fmul_op # NORM x ZERO
8348: short fmul_inf_src - tbl_fmul_op # NORM x INF
8349: short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8350: short fmul_norm - tbl_fmul_op # NORM x DENORM
8351: short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8352: short tbl_fmul_op - tbl_fmul_op #
8353: short tbl_fmul_op - tbl_fmul_op #
8354:
8355: short fmul_zero - tbl_fmul_op # ZERO x NORM
8356: short fmul_zero - tbl_fmul_op # ZERO x ZERO
8357: short fmul_res_operr - tbl_fmul_op # ZERO x INF
8358: short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8359: short fmul_zero - tbl_fmul_op # ZERO x DENORM
8360: short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8361: short tbl_fmul_op - tbl_fmul_op #
8362: short tbl_fmul_op - tbl_fmul_op #
8363:
8364: short fmul_inf_dst - tbl_fmul_op # INF x NORM
8365: short fmul_res_operr - tbl_fmul_op # INF x ZERO
8366: short fmul_inf_dst - tbl_fmul_op # INF x INF
8367: short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8368: short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8369: short fmul_res_snan - tbl_fmul_op # INF x SNAN
8370: short tbl_fmul_op - tbl_fmul_op #
8371: short tbl_fmul_op - tbl_fmul_op #
8372:
8373: short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8374: short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8375: short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8376: short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8377: short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8378: short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8379: short tbl_fmul_op - tbl_fmul_op #
8380: short tbl_fmul_op - tbl_fmul_op #
8381:
8382: short fmul_norm - tbl_fmul_op # NORM x NORM
8383: short fmul_zero - tbl_fmul_op # NORM x ZERO
8384: short fmul_inf_src - tbl_fmul_op # NORM x INF
8385: short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8386: short fmul_norm - tbl_fmul_op # NORM x DENORM
8387: short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8388: short tbl_fmul_op - tbl_fmul_op #
8389: short tbl_fmul_op - tbl_fmul_op #
8390:
8391: short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8392: short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8393: short fmul_res_snan - tbl_fmul_op # SNAN x INF
8394: short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8395: short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8396: short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8397: short tbl_fmul_op - tbl_fmul_op #
8398: short tbl_fmul_op - tbl_fmul_op #
8399:
8400: fmul_res_operr:
8401: bra.l res_operr
8402: fmul_res_snan:
8403: bra.l res_snan
8404: fmul_res_qnan:
8405: bra.l res_qnan
8406:
8407: #
8408: # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8409: #
8410: global fmul_zero # global for fsglmul
8411: fmul_zero:
8412: mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8413: mov.b DST_EX(%a1),%d1
8414: eor.b %d0,%d1
8415: bpl.b fmul_zero_p # result ZERO is pos.
8416: fmul_zero_n:
8417: fmov.s &0x80000000,%fp0 # load -ZERO
8418: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8419: rts
8420: fmul_zero_p:
8421: fmov.s &0x00000000,%fp0 # load +ZERO
8422: mov.b &z_bmask,FPSR_CC(%a6) # set Z
8423: rts
8424:
8425: #
8426: # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8427: #
8428: # Note: The j-bit for an infinity is a don't-care. However, to be
8429: # strictly compatible w/ the 68881/882, we make sure to return an
8430: # INF w/ the j-bit set if the input INF j-bit was set. Destination
8431: # INFs take priority.
8432: #
8433: global fmul_inf_dst # global for fsglmul
8434: fmul_inf_dst:
8435: fmovm.x DST(%a1),&0x80 # return INF result in fp0
8436: mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8437: mov.b DST_EX(%a1),%d1
8438: eor.b %d0,%d1
8439: bpl.b fmul_inf_dst_p # result INF is pos.
8440: fmul_inf_dst_n:
8441: fabs.x %fp0 # clear result sign
8442: fneg.x %fp0 # set result sign
8443: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8444: rts
8445: fmul_inf_dst_p:
8446: fabs.x %fp0 # clear result sign
8447: mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8448: rts
8449:
8450: global fmul_inf_src # global for fsglmul
8451: fmul_inf_src:
8452: fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8453: mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8454: mov.b DST_EX(%a1),%d1
8455: eor.b %d0,%d1
8456: bpl.b fmul_inf_dst_p # result INF is pos.
8457: bra.b fmul_inf_dst_n
8458:
8459: #########################################################################
8460: # XDEF **************************************************************** #
8461: # fin(): emulates the fmove instruction #
8462: # fsin(): emulates the fsmove instruction #
8463: # fdin(): emulates the fdmove instruction #
8464: # #
8465: # XREF **************************************************************** #
8466: # norm() - normalize mantissa for EXOP on denorm #
8467: # scale_to_zero_src() - scale src exponent to zero #
8468: # ovf_res() - return default overflow result #
8469: # unf_res() - return default underflow result #
8470: # res_qnan_1op() - return QNAN result #
8471: # res_snan_1op() - return SNAN result #
8472: # #
8473: # INPUT *************************************************************** #
8474: # a0 = pointer to extended precision source operand #
8475: # d0 = round prec/mode #
8476: # #
8477: # OUTPUT ************************************************************** #
8478: # fp0 = result #
8479: # fp1 = EXOP (if exception occurred) #
8480: # #
8481: # ALGORITHM *********************************************************** #
8482: # Handle NANs, infinities, and zeroes as special cases. Divide #
8483: # norms into extended, single, and double precision. #
8484: # Norms can be emulated w/ a regular fmove instruction. For #
8485: # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8486: # if the result would have overflowed/underflowed. If so, use unf_res() #
8487: # or ovf_res() to return the default result. Also return EXOP if #
8488: # exception is enabled. If no exception, return the default result. #
8489: # Unnorms don't pass through here. #
8490: # #
8491: #########################################################################
8492:
8493: global fsin
8494: fsin:
8495: andi.b &0x30,%d0 # clear rnd prec
8496: ori.b &s_mode*0x10,%d0 # insert sgl precision
8497: bra.b fin
8498:
8499: global fdin
8500: fdin:
8501: andi.b &0x30,%d0 # clear rnd prec
8502: ori.b &d_mode*0x10,%d0 # insert dbl precision
8503:
8504: global fin
8505: fin:
8506: mov.l %d0,L_SCR3(%a6) # store rnd info
8507:
8508: mov.b STAG(%a6),%d1 # fetch src optype tag
8509: bne.w fin_not_norm # optimize on non-norm input
8510:
8511: #
8512: # FP MOVE IN: NORMs and DENORMs ONLY!
8513: #
8514: fin_norm:
8515: andi.b &0xc0,%d0 # is precision extended?
8516: bne.w fin_not_ext # no, so go handle dbl or sgl
8517:
8518: #
8519: # precision selected is extended. so...we cannot get an underflow
8520: # or overflow because of rounding to the correct precision. so...
8521: # skip the scaling and unscaling...
8522: #
8523: tst.b SRC_EX(%a0) # is the operand negative?
8524: bpl.b fin_norm_done # no
8525: bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8526: fin_norm_done:
8527: fmovm.x SRC(%a0),&0x80 # return result in fp0
8528: rts
8529:
8530: #
8531: # for an extended precision DENORM, the UNFL exception bit is set
8532: # the accrued bit is NOT set in this instance(no inexactness!)
8533: #
8534: fin_denorm:
8535: andi.b &0xc0,%d0 # is precision extended?
8536: bne.w fin_not_ext # no, so go handle dbl or sgl
8537:
8538: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8539: tst.b SRC_EX(%a0) # is the operand negative?
8540: bpl.b fin_denorm_done # no
8541: bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8542: fin_denorm_done:
8543: fmovm.x SRC(%a0),&0x80 # return result in fp0
8544: btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8545: bne.b fin_denorm_unfl_ena # yes
8546: rts
8547:
8548: #
8549: # the input is an extended DENORM and underflow is enabled in the FPCR.
8550: # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8551: # exponent and insert back into the operand.
8552: #
8553: fin_denorm_unfl_ena:
8554: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8555: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8556: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8557: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8558: bsr.l norm # normalize result
8559: neg.w %d0 # new exponent = -(shft val)
8560: addi.w &0x6000,%d0 # add new bias to exponent
8561: mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8562: andi.w &0x8000,%d1 # keep old sign
8563: andi.w &0x7fff,%d0 # clear sign position
8564: or.w %d1,%d0 # concat new exo,old sign
8565: mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8566: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8567: rts
8568:
8569: #
8570: # operand is to be rounded to single or double precision
8571: #
8572: fin_not_ext:
8573: cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8574: bne.b fin_dbl
8575:
8576: #
8577: # operand is to be rounded to single precision
8578: #
8579: fin_sgl:
8580: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8581: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8582: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8583: bsr.l scale_to_zero_src # calculate scale factor
8584:
8585: cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8586: bge.w fin_sd_unfl # yes; go handle underflow
8587: cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8588: beq.w fin_sd_may_ovfl # maybe; go check
8589: blt.w fin_sd_ovfl # yes; go handle overflow
8590:
8591: #
8592: # operand will NOT overflow or underflow when moved into the fp reg file
8593: #
8594: fin_sd_normal:
8595: fmov.l &0x0,%fpsr # clear FPSR
8596: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8597:
8598: fmov.x FP_SCR0(%a6),%fp0 # perform move
8599:
8600: fmov.l %fpsr,%d1 # save FPSR
8601: fmov.l &0x0,%fpcr # clear FPCR
8602:
8603: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8604:
8605: fin_sd_normal_exit:
8606: mov.l %d2,-(%sp) # save d2
8607: fmovm.x &0x80,FP_SCR0(%a6) # store out result
8608: mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8609: mov.w %d1,%d2 # make a copy
8610: andi.l &0x7fff,%d1 # strip sign
8611: sub.l %d0,%d1 # add scale factor
8612: andi.w &0x8000,%d2 # keep old sign
8613: or.w %d1,%d2 # concat old sign,new exponent
8614: mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8615: mov.l (%sp)+,%d2 # restore d2
8616: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8617: rts
8618:
8619: #
8620: # operand is to be rounded to double precision
8621: #
8622: fin_dbl:
8623: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8624: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8625: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8626: bsr.l scale_to_zero_src # calculate scale factor
8627:
8628: cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8629: bge.w fin_sd_unfl # yes; go handle underflow
8630: cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8631: beq.w fin_sd_may_ovfl # maybe; go check
8632: blt.w fin_sd_ovfl # yes; go handle overflow
8633: bra.w fin_sd_normal # no; ho handle normalized op
8634:
8635: #
8636: # operand WILL underflow when moved in to the fp register file
8637: #
8638: fin_sd_unfl:
8639: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8640:
8641: tst.b FP_SCR0_EX(%a6) # is operand negative?
8642: bpl.b fin_sd_unfl_tst
8643: bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8644:
8645: # if underflow or inexact is enabled, then go calculate the EXOP first.
8646: fin_sd_unfl_tst:
8647: mov.b FPCR_ENABLE(%a6),%d1
8648: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8649: bne.b fin_sd_unfl_ena # yes
8650:
8651: fin_sd_unfl_dis:
8652: lea FP_SCR0(%a6),%a0 # pass: result addr
8653: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8654: bsr.l unf_res # calculate default result
8655: or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8656: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8657: rts
8658:
8659: #
8660: # operand will underflow AND underflow or inexact is enabled.
8661: # therefore, we must return the result rounded to extended precision.
8662: #
8663: fin_sd_unfl_ena:
8664: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8665: mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8666: mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8667:
8668: mov.l %d2,-(%sp) # save d2
8669: mov.w %d1,%d2 # make a copy
8670: andi.l &0x7fff,%d1 # strip sign
8671: sub.l %d0,%d1 # subtract scale factor
8672: andi.w &0x8000,%d2 # extract old sign
8673: addi.l &0x6000,%d1 # add new bias
8674: andi.w &0x7fff,%d1
8675: or.w %d1,%d2 # concat old sign,new exp
8676: mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8677: fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8678: mov.l (%sp)+,%d2 # restore d2
8679: bra.b fin_sd_unfl_dis
8680:
8681: #
8682: # operand WILL overflow.
8683: #
8684: fin_sd_ovfl:
8685: fmov.l &0x0,%fpsr # clear FPSR
8686: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8687:
8688: fmov.x FP_SCR0(%a6),%fp0 # perform move
8689:
8690: fmov.l &0x0,%fpcr # clear FPCR
8691: fmov.l %fpsr,%d1 # save FPSR
8692:
8693: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8694:
8695: fin_sd_ovfl_tst:
8696: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8697:
8698: mov.b FPCR_ENABLE(%a6),%d1
8699: andi.b &0x13,%d1 # is OVFL or INEX enabled?
8700: bne.b fin_sd_ovfl_ena # yes
8701:
8702: #
8703: # OVFL is not enabled; therefore, we must create the default result by
8704: # calling ovf_res().
8705: #
8706: fin_sd_ovfl_dis:
8707: btst &neg_bit,FPSR_CC(%a6) # is result negative?
8708: sne %d1 # set sign param accordingly
8709: mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8710: bsr.l ovf_res # calculate default result
8711: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8712: fmovm.x (%a0),&0x80 # return default result in fp0
8713: rts
8714:
8715: #
8716: # OVFL is enabled.
8717: # the INEX2 bit has already been updated by the round to the correct precision.
8718: # now, round to extended(and don't alter the FPSR).
8719: #
8720: fin_sd_ovfl_ena:
8721: mov.l %d2,-(%sp) # save d2
8722: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8723: mov.l %d1,%d2 # make a copy
8724: andi.l &0x7fff,%d1 # strip sign
8725: andi.w &0x8000,%d2 # keep old sign
8726: sub.l %d0,%d1 # add scale factor
8727: sub.l &0x6000,%d1 # subtract bias
8728: andi.w &0x7fff,%d1
8729: or.w %d2,%d1
8730: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8731: mov.l (%sp)+,%d2 # restore d2
8732: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8733: bra.b fin_sd_ovfl_dis
8734:
8735: #
8736: # the move in MAY overflow. so...
8737: #
8738: fin_sd_may_ovfl:
8739: fmov.l &0x0,%fpsr # clear FPSR
8740: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8741:
8742: fmov.x FP_SCR0(%a6),%fp0 # perform the move
8743:
8744: fmov.l %fpsr,%d1 # save status
8745: fmov.l &0x0,%fpcr # clear FPCR
8746:
8747: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8748:
8749: fabs.x %fp0,%fp1 # make a copy of result
8750: fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8751: fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8752:
8753: # no, it didn't overflow; we have correct result
8754: bra.w fin_sd_normal_exit
8755:
8756: ##########################################################################
8757:
8758: #
8759: # operand is not a NORM: check its optype and branch accordingly
8760: #
8761: fin_not_norm:
8762: cmpi.b %d1,&DENORM # weed out DENORM
8763: beq.w fin_denorm
8764: cmpi.b %d1,&SNAN # weed out SNANs
8765: beq.l res_snan_1op
8766: cmpi.b %d1,&QNAN # weed out QNANs
8767: beq.l res_qnan_1op
8768:
8769: #
8770: # do the fmove in; at this point, only possible ops are ZERO and INF.
8771: # use fmov to determine ccodes.
8772: # prec:mode should be zero at this point but it won't affect answer anyways.
8773: #
8774: fmov.x SRC(%a0),%fp0 # do fmove in
8775: fmov.l %fpsr,%d0 # no exceptions possible
8776: rol.l &0x8,%d0 # put ccodes in lo byte
8777: mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8778: rts
8779:
8780: #########################################################################
8781: # XDEF **************************************************************** #
8782: # fdiv(): emulates the fdiv instruction #
8783: # fsdiv(): emulates the fsdiv instruction #
8784: # fddiv(): emulates the fddiv instruction #
8785: # #
8786: # XREF **************************************************************** #
8787: # scale_to_zero_src() - scale src exponent to zero #
8788: # scale_to_zero_dst() - scale dst exponent to zero #
8789: # unf_res() - return default underflow result #
8790: # ovf_res() - return default overflow result #
8791: # res_qnan() - return QNAN result #
8792: # res_snan() - return SNAN result #
8793: # #
8794: # INPUT *************************************************************** #
8795: # a0 = pointer to extended precision source operand #
8796: # a1 = pointer to extended precision destination operand #
8797: # d0 rnd prec,mode #
8798: # #
8799: # OUTPUT ************************************************************** #
8800: # fp0 = result #
8801: # fp1 = EXOP (if exception occurred) #
8802: # #
8803: # ALGORITHM *********************************************************** #
8804: # Handle NANs, infinities, and zeroes as special cases. Divide #
8805: # norms/denorms into ext/sgl/dbl precision. #
8806: # For norms/denorms, scale the exponents such that a divide #
8807: # instruction won't cause an exception. Use the regular fdiv to #
8808: # compute a result. Check if the regular operands would have taken #
8809: # an exception. If so, return the default overflow/underflow result #
8810: # and return the EXOP if exceptions are enabled. Else, scale the #
8811: # result operand to the proper exponent. #
8812: # #
8813: #########################################################################
8814:
8815: align 0x10
8816: tbl_fdiv_unfl:
8817: long 0x3fff - 0x0000 # ext_unfl
8818: long 0x3fff - 0x3f81 # sgl_unfl
8819: long 0x3fff - 0x3c01 # dbl_unfl
8820:
8821: tbl_fdiv_ovfl:
8822: long 0x3fff - 0x7ffe # ext overflow exponent
8823: long 0x3fff - 0x407e # sgl overflow exponent
8824: long 0x3fff - 0x43fe # dbl overflow exponent
8825:
8826: global fsdiv
8827: fsdiv:
8828: andi.b &0x30,%d0 # clear rnd prec
8829: ori.b &s_mode*0x10,%d0 # insert sgl prec
8830: bra.b fdiv
8831:
8832: global fddiv
8833: fddiv:
8834: andi.b &0x30,%d0 # clear rnd prec
8835: ori.b &d_mode*0x10,%d0 # insert dbl prec
8836:
8837: global fdiv
8838: fdiv:
8839: mov.l %d0,L_SCR3(%a6) # store rnd info
8840:
8841: clr.w %d1
8842: mov.b DTAG(%a6),%d1
8843: lsl.b &0x3,%d1
8844: or.b STAG(%a6),%d1 # combine src tags
8845:
8846: bne.w fdiv_not_norm # optimize on non-norm input
8847:
8848: #
8849: # DIVIDE: NORMs and DENORMs ONLY!
8850: #
8851: fdiv_norm:
8852: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8853: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8854: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8855:
8856: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8857: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8858: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8859:
8860: bsr.l scale_to_zero_src # scale src exponent
8861: mov.l %d0,-(%sp) # save scale factor 1
8862:
8863: bsr.l scale_to_zero_dst # scale dst exponent
8864:
8865: neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8866: add.l %d0,(%sp)
8867:
8868: mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8869: lsr.b &0x6,%d1 # shift to lo bits
8870: mov.l (%sp)+,%d0 # load S.F.
8871: cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8872: ble.w fdiv_may_ovfl # result will overflow
8873:
8874: cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8875: beq.w fdiv_may_unfl # maybe
8876: bgt.w fdiv_unfl # yes; go handle underflow
8877:
8878: fdiv_normal:
8879: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8880:
8881: fmov.l L_SCR3(%a6),%fpcr # save FPCR
8882: fmov.l &0x0,%fpsr # clear FPSR
8883:
8884: fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8885:
8886: fmov.l %fpsr,%d1 # save FPSR
8887: fmov.l &0x0,%fpcr # clear FPCR
8888:
8889: or.l %d1,USER_FPSR(%a6) # save INEX2,N
8890:
8891: fdiv_normal_exit:
8892: fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8893: mov.l %d2,-(%sp) # store d2
8894: mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8895: mov.l %d1,%d2 # make a copy
8896: andi.l &0x7fff,%d1 # strip sign
8897: andi.w &0x8000,%d2 # keep old sign
8898: sub.l %d0,%d1 # add scale factor
8899: or.w %d2,%d1 # concat old sign,new exp
8900: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8901: mov.l (%sp)+,%d2 # restore d2
8902: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8903: rts
8904:
8905: tbl_fdiv_ovfl2:
8906: long 0x7fff
8907: long 0x407f
8908: long 0x43ff
8909:
8910: fdiv_no_ovfl:
8911: mov.l (%sp)+,%d0 # restore scale factor
8912: bra.b fdiv_normal_exit
8913:
8914: fdiv_may_ovfl:
8915: mov.l %d0,-(%sp) # save scale factor
8916:
8917: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8918:
8919: fmov.l L_SCR3(%a6),%fpcr # set FPCR
8920: fmov.l &0x0,%fpsr # set FPSR
8921:
8922: fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8923:
8924: fmov.l %fpsr,%d0
8925: fmov.l &0x0,%fpcr
8926:
8927: or.l %d0,USER_FPSR(%a6) # save INEX,N
8928:
8929: fmovm.x &0x01,-(%sp) # save result to stack
8930: mov.w (%sp),%d0 # fetch new exponent
8931: add.l &0xc,%sp # clear result from stack
8932: andi.l &0x7fff,%d0 # strip sign
8933: sub.l (%sp),%d0 # add scale factor
8934: cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8935: blt.b fdiv_no_ovfl
8936: mov.l (%sp)+,%d0
8937:
8938: fdiv_ovfl_tst:
8939: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8940:
8941: mov.b FPCR_ENABLE(%a6),%d1
8942: andi.b &0x13,%d1 # is OVFL or INEX enabled?
8943: bne.b fdiv_ovfl_ena # yes
8944:
8945: fdiv_ovfl_dis:
8946: btst &neg_bit,FPSR_CC(%a6) # is result negative?
8947: sne %d1 # set sign param accordingly
8948: mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8949: bsr.l ovf_res # calculate default result
8950: or.b %d0,FPSR_CC(%a6) # set INF if applicable
8951: fmovm.x (%a0),&0x80 # return default result in fp0
8952: rts
8953:
8954: fdiv_ovfl_ena:
8955: mov.l L_SCR3(%a6),%d1
8956: andi.b &0xc0,%d1 # is precision extended?
8957: bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8958:
8959: fdiv_ovfl_ena_cont:
8960: fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8961:
8962: mov.l %d2,-(%sp) # save d2
8963: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8964: mov.w %d1,%d2 # make a copy
8965: andi.l &0x7fff,%d1 # strip sign
8966: sub.l %d0,%d1 # add scale factor
8967: subi.l &0x6000,%d1 # subtract bias
8968: andi.w &0x7fff,%d1 # clear sign bit
8969: andi.w &0x8000,%d2 # keep old sign
8970: or.w %d2,%d1 # concat old sign,new exp
8971: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8972: mov.l (%sp)+,%d2 # restore d2
8973: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8974: bra.b fdiv_ovfl_dis
8975:
8976: fdiv_ovfl_ena_sd:
8977: fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8978:
8979: mov.l L_SCR3(%a6),%d1
8980: andi.b &0x30,%d1 # keep rnd mode
8981: fmov.l %d1,%fpcr # set FPCR
8982:
8983: fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8984:
8985: fmov.l &0x0,%fpcr # clear FPCR
8986: bra.b fdiv_ovfl_ena_cont
8987:
8988: fdiv_unfl:
8989: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8990:
8991: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8992:
8993: fmov.l &rz_mode*0x10,%fpcr # set FPCR
8994: fmov.l &0x0,%fpsr # clear FPSR
8995:
8996: fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8997:
8998: fmov.l %fpsr,%d1 # save status
8999: fmov.l &0x0,%fpcr # clear FPCR
9000:
9001: or.l %d1,USER_FPSR(%a6) # save INEX2,N
9002:
9003: mov.b FPCR_ENABLE(%a6),%d1
9004: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9005: bne.b fdiv_unfl_ena # yes
9006:
9007: fdiv_unfl_dis:
9008: fmovm.x &0x80,FP_SCR0(%a6) # store out result
9009:
9010: lea FP_SCR0(%a6),%a0 # pass: result addr
9011: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9012: bsr.l unf_res # calculate default result
9013: or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9014: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9015: rts
9016:
9017: #
9018: # UNFL is enabled.
9019: #
9020: fdiv_unfl_ena:
9021: fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9022:
9023: mov.l L_SCR3(%a6),%d1
9024: andi.b &0xc0,%d1 # is precision extended?
9025: bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9026:
9027: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9028:
9029: fdiv_unfl_ena_cont:
9030: fmov.l &0x0,%fpsr # clear FPSR
9031:
9032: fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9033:
9034: fmov.l &0x0,%fpcr # clear FPCR
9035:
9036: fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9037: mov.l %d2,-(%sp) # save d2
9038: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9039: mov.l %d1,%d2 # make a copy
9040: andi.l &0x7fff,%d1 # strip sign
9041: andi.w &0x8000,%d2 # keep old sign
9042: sub.l %d0,%d1 # add scale factor
9043: addi.l &0x6000,%d1 # add bias
9044: andi.w &0x7fff,%d1
9045: or.w %d2,%d1 # concat old sign,new exp
9046: mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9047: mov.l (%sp)+,%d2 # restore d2
9048: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9049: bra.w fdiv_unfl_dis
9050:
9051: fdiv_unfl_ena_sd:
9052: mov.l L_SCR3(%a6),%d1
9053: andi.b &0x30,%d1 # use only rnd mode
9054: fmov.l %d1,%fpcr # set FPCR
9055:
9056: bra.b fdiv_unfl_ena_cont
9057:
9058: #
9059: # the divide operation MAY underflow:
9060: #
9061: fdiv_may_unfl:
9062: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9063:
9064: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9065: fmov.l &0x0,%fpsr # clear FPSR
9066:
9067: fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9068:
9069: fmov.l %fpsr,%d1 # save status
9070: fmov.l &0x0,%fpcr # clear FPCR
9071:
9072: or.l %d1,USER_FPSR(%a6) # save INEX2,N
9073:
9074: fabs.x %fp0,%fp1 # make a copy of result
9075: fcmp.b %fp1,&0x1 # is |result| > 1.b?
9076: fbgt.w fdiv_normal_exit # no; no underflow occurred
9077: fblt.w fdiv_unfl # yes; underflow occurred
9078:
9079: #
9080: # we still don't know if underflow occurred. result is ~ equal to 1. but,
9081: # we don't know if the result was an underflow that rounded up to a 1
9082: # or a normalized number that rounded down to a 1. so, redo the entire
9083: # operation using RZ as the rounding mode to see what the pre-rounded
9084: # result is. this case should be relatively rare.
9085: #
9086: fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9087:
9088: mov.l L_SCR3(%a6),%d1
9089: andi.b &0xc0,%d1 # keep rnd prec
9090: ori.b &rz_mode*0x10,%d1 # insert RZ
9091:
9092: fmov.l %d1,%fpcr # set FPCR
9093: fmov.l &0x0,%fpsr # clear FPSR
9094:
9095: fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9096:
9097: fmov.l &0x0,%fpcr # clear FPCR
9098: fabs.x %fp1 # make absolute value
9099: fcmp.b %fp1,&0x1 # is |result| < 1.b?
9100: fbge.w fdiv_normal_exit # no; no underflow occurred
9101: bra.w fdiv_unfl # yes; underflow occurred
9102:
9103: ############################################################################
9104:
9105: #
9106: # Divide: inputs are not both normalized; what are they?
9107: #
9108: fdiv_not_norm:
9109: mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9110: jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9111:
9112: swbeg &48
9113: tbl_fdiv_op:
9114: short fdiv_norm - tbl_fdiv_op # NORM / NORM
9115: short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9116: short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9117: short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9118: short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9119: short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9120: short tbl_fdiv_op - tbl_fdiv_op #
9121: short tbl_fdiv_op - tbl_fdiv_op #
9122:
9123: short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9124: short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9125: short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9126: short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9127: short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9128: short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9129: short tbl_fdiv_op - tbl_fdiv_op #
9130: short tbl_fdiv_op - tbl_fdiv_op #
9131:
9132: short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9133: short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9134: short fdiv_res_operr - tbl_fdiv_op # INF / INF
9135: short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9136: short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9137: short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9138: short tbl_fdiv_op - tbl_fdiv_op #
9139: short tbl_fdiv_op - tbl_fdiv_op #
9140:
9141: short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9142: short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9143: short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9144: short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9145: short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9146: short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9147: short tbl_fdiv_op - tbl_fdiv_op #
9148: short tbl_fdiv_op - tbl_fdiv_op #
9149:
9150: short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9151: short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9152: short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9153: short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9154: short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9155: short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9156: short tbl_fdiv_op - tbl_fdiv_op #
9157: short tbl_fdiv_op - tbl_fdiv_op #
9158:
9159: short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9160: short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9161: short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9162: short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9163: short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9164: short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9165: short tbl_fdiv_op - tbl_fdiv_op #
9166: short tbl_fdiv_op - tbl_fdiv_op #
9167:
9168: fdiv_res_qnan:
9169: bra.l res_qnan
9170: fdiv_res_snan:
9171: bra.l res_snan
9172: fdiv_res_operr:
9173: bra.l res_operr
9174:
9175: global fdiv_zero_load # global for fsgldiv
9176: fdiv_zero_load:
9177: mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9178: mov.b DST_EX(%a1),%d1 # or of input signs.
9179: eor.b %d0,%d1
9180: bpl.b fdiv_zero_load_p # result is positive
9181: fmov.s &0x80000000,%fp0 # load a -ZERO
9182: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9183: rts
9184: fdiv_zero_load_p:
9185: fmov.s &0x00000000,%fp0 # load a +ZERO
9186: mov.b &z_bmask,FPSR_CC(%a6) # set Z
9187: rts
9188:
9189: #
9190: # The destination was In Range and the source was a ZERO. The result,
9191: # therefore, is an INF w/ the proper sign.
9192: # So, determine the sign and return a new INF (w/ the j-bit cleared).
9193: #
9194: global fdiv_inf_load # global for fsgldiv
9195: fdiv_inf_load:
9196: ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9197: mov.b SRC_EX(%a0),%d0 # load both signs
9198: mov.b DST_EX(%a1),%d1
9199: eor.b %d0,%d1
9200: bpl.b fdiv_inf_load_p # result is positive
9201: fmov.s &0xff800000,%fp0 # make result -INF
9202: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9203: rts
9204: fdiv_inf_load_p:
9205: fmov.s &0x7f800000,%fp0 # make result +INF
9206: mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9207: rts
9208:
9209: #
9210: # The destination was an INF w/ an In Range or ZERO source, the result is
9211: # an INF w/ the proper sign.
9212: # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9213: # dst INF is set, then then j-bit of the result INF is also set).
9214: #
9215: global fdiv_inf_dst # global for fsgldiv
9216: fdiv_inf_dst:
9217: mov.b DST_EX(%a1),%d0 # load both signs
9218: mov.b SRC_EX(%a0),%d1
9219: eor.b %d0,%d1
9220: bpl.b fdiv_inf_dst_p # result is positive
9221:
9222: fmovm.x DST(%a1),&0x80 # return result in fp0
9223: fabs.x %fp0 # clear sign bit
9224: fneg.x %fp0 # set sign bit
9225: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9226: rts
9227:
9228: fdiv_inf_dst_p:
9229: fmovm.x DST(%a1),&0x80 # return result in fp0
9230: fabs.x %fp0 # return positive INF
9231: mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9232: rts
9233:
9234: #########################################################################
9235: # XDEF **************************************************************** #
9236: # fneg(): emulates the fneg instruction #
9237: # fsneg(): emulates the fsneg instruction #
9238: # fdneg(): emulates the fdneg instruction #
9239: # #
9240: # XREF **************************************************************** #
9241: # norm() - normalize a denorm to provide EXOP #
9242: # scale_to_zero_src() - scale sgl/dbl source exponent #
9243: # ovf_res() - return default overflow result #
9244: # unf_res() - return default underflow result #
9245: # res_qnan_1op() - return QNAN result #
9246: # res_snan_1op() - return SNAN result #
9247: # #
9248: # INPUT *************************************************************** #
9249: # a0 = pointer to extended precision source operand #
9250: # d0 = rnd prec,mode #
9251: # #
9252: # OUTPUT ************************************************************** #
9253: # fp0 = result #
9254: # fp1 = EXOP (if exception occurred) #
9255: # #
9256: # ALGORITHM *********************************************************** #
9257: # Handle NANs, zeroes, and infinities as special cases. Separate #
9258: # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9259: # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9260: # and an actual fneg performed to see if overflow/underflow would have #
9261: # occurred. If so, return default underflow/overflow result. Else, #
9262: # scale the result exponent and return result. FPSR gets set based on #
9263: # the result value. #
9264: # #
9265: #########################################################################
9266:
9267: global fsneg
9268: fsneg:
9269: andi.b &0x30,%d0 # clear rnd prec
9270: ori.b &s_mode*0x10,%d0 # insert sgl precision
9271: bra.b fneg
9272:
9273: global fdneg
9274: fdneg:
9275: andi.b &0x30,%d0 # clear rnd prec
9276: ori.b &d_mode*0x10,%d0 # insert dbl prec
9277:
9278: global fneg
9279: fneg:
9280: mov.l %d0,L_SCR3(%a6) # store rnd info
9281: mov.b STAG(%a6),%d1
9282: bne.w fneg_not_norm # optimize on non-norm input
9283:
9284: #
9285: # NEGATE SIGN : norms and denorms ONLY!
9286: #
9287: fneg_norm:
9288: andi.b &0xc0,%d0 # is precision extended?
9289: bne.w fneg_not_ext # no; go handle sgl or dbl
9290:
9291: #
9292: # precision selected is extended. so...we can not get an underflow
9293: # or overflow because of rounding to the correct precision. so...
9294: # skip the scaling and unscaling...
9295: #
9296: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9297: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9298: mov.w SRC_EX(%a0),%d0
9299: eori.w &0x8000,%d0 # negate sign
9300: bpl.b fneg_norm_load # sign is positive
9301: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9302: fneg_norm_load:
9303: mov.w %d0,FP_SCR0_EX(%a6)
9304: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9305: rts
9306:
9307: #
9308: # for an extended precision DENORM, the UNFL exception bit is set
9309: # the accrued bit is NOT set in this instance(no inexactness!)
9310: #
9311: fneg_denorm:
9312: andi.b &0xc0,%d0 # is precision extended?
9313: bne.b fneg_not_ext # no; go handle sgl or dbl
9314:
9315: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9316:
9317: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9318: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9319: mov.w SRC_EX(%a0),%d0
9320: eori.w &0x8000,%d0 # negate sign
9321: bpl.b fneg_denorm_done # no
9322: mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9323: fneg_denorm_done:
9324: mov.w %d0,FP_SCR0_EX(%a6)
9325: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9326:
9327: btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9328: bne.b fneg_ext_unfl_ena # yes
9329: rts
9330:
9331: #
9332: # the input is an extended DENORM and underflow is enabled in the FPCR.
9333: # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9334: # exponent and insert back into the operand.
9335: #
9336: fneg_ext_unfl_ena:
9337: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9338: bsr.l norm # normalize result
9339: neg.w %d0 # new exponent = -(shft val)
9340: addi.w &0x6000,%d0 # add new bias to exponent
9341: mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9342: andi.w &0x8000,%d1 # keep old sign
9343: andi.w &0x7fff,%d0 # clear sign position
9344: or.w %d1,%d0 # concat old sign, new exponent
9345: mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9346: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9347: rts
9348:
9349: #
9350: # operand is either single or double
9351: #
9352: fneg_not_ext:
9353: cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9354: bne.b fneg_dbl
9355:
9356: #
9357: # operand is to be rounded to single precision
9358: #
9359: fneg_sgl:
9360: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9361: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9362: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9363: bsr.l scale_to_zero_src # calculate scale factor
9364:
9365: cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9366: bge.w fneg_sd_unfl # yes; go handle underflow
9367: cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9368: beq.w fneg_sd_may_ovfl # maybe; go check
9369: blt.w fneg_sd_ovfl # yes; go handle overflow
9370:
9371: #
9372: # operand will NOT overflow or underflow when moved in to the fp reg file
9373: #
9374: fneg_sd_normal:
9375: fmov.l &0x0,%fpsr # clear FPSR
9376: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9377:
9378: fneg.x FP_SCR0(%a6),%fp0 # perform negation
9379:
9380: fmov.l %fpsr,%d1 # save FPSR
9381: fmov.l &0x0,%fpcr # clear FPCR
9382:
9383: or.l %d1,USER_FPSR(%a6) # save INEX2,N
9384:
9385: fneg_sd_normal_exit:
9386: mov.l %d2,-(%sp) # save d2
9387: fmovm.x &0x80,FP_SCR0(%a6) # store out result
9388: mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9389: mov.w %d1,%d2 # make a copy
9390: andi.l &0x7fff,%d1 # strip sign
9391: sub.l %d0,%d1 # add scale factor
9392: andi.w &0x8000,%d2 # keep old sign
9393: or.w %d1,%d2 # concat old sign,new exp
9394: mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9395: mov.l (%sp)+,%d2 # restore d2
9396: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9397: rts
9398:
9399: #
9400: # operand is to be rounded to double precision
9401: #
9402: fneg_dbl:
9403: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9404: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9405: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9406: bsr.l scale_to_zero_src # calculate scale factor
9407:
9408: cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9409: bge.b fneg_sd_unfl # yes; go handle underflow
9410: cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9411: beq.w fneg_sd_may_ovfl # maybe; go check
9412: blt.w fneg_sd_ovfl # yes; go handle overflow
9413: bra.w fneg_sd_normal # no; ho handle normalized op
9414:
9415: #
9416: # operand WILL underflow when moved in to the fp register file
9417: #
9418: fneg_sd_unfl:
9419: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9420:
9421: eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9422: bpl.b fneg_sd_unfl_tst
9423: bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9424:
9425: # if underflow or inexact is enabled, go calculate EXOP first.
9426: fneg_sd_unfl_tst:
9427: mov.b FPCR_ENABLE(%a6),%d1
9428: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9429: bne.b fneg_sd_unfl_ena # yes
9430:
9431: fneg_sd_unfl_dis:
9432: lea FP_SCR0(%a6),%a0 # pass: result addr
9433: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9434: bsr.l unf_res # calculate default result
9435: or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9436: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9437: rts
9438:
9439: #
9440: # operand will underflow AND underflow is enabled.
9441: # therefore, we must return the result rounded to extended precision.
9442: #
9443: fneg_sd_unfl_ena:
9444: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9445: mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9446: mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9447:
9448: mov.l %d2,-(%sp) # save d2
9449: mov.l %d1,%d2 # make a copy
9450: andi.l &0x7fff,%d1 # strip sign
9451: andi.w &0x8000,%d2 # keep old sign
9452: sub.l %d0,%d1 # subtract scale factor
9453: addi.l &0x6000,%d1 # add new bias
9454: andi.w &0x7fff,%d1
9455: or.w %d2,%d1 # concat new sign,new exp
9456: mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9457: fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9458: mov.l (%sp)+,%d2 # restore d2
9459: bra.b fneg_sd_unfl_dis
9460:
9461: #
9462: # operand WILL overflow.
9463: #
9464: fneg_sd_ovfl:
9465: fmov.l &0x0,%fpsr # clear FPSR
9466: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9467:
9468: fneg.x FP_SCR0(%a6),%fp0 # perform negation
9469:
9470: fmov.l &0x0,%fpcr # clear FPCR
9471: fmov.l %fpsr,%d1 # save FPSR
9472:
9473: or.l %d1,USER_FPSR(%a6) # save INEX2,N
9474:
9475: fneg_sd_ovfl_tst:
9476: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9477:
9478: mov.b FPCR_ENABLE(%a6),%d1
9479: andi.b &0x13,%d1 # is OVFL or INEX enabled?
9480: bne.b fneg_sd_ovfl_ena # yes
9481:
9482: #
9483: # OVFL is not enabled; therefore, we must create the default result by
9484: # calling ovf_res().
9485: #
9486: fneg_sd_ovfl_dis:
9487: btst &neg_bit,FPSR_CC(%a6) # is result negative?
9488: sne %d1 # set sign param accordingly
9489: mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9490: bsr.l ovf_res # calculate default result
9491: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9492: fmovm.x (%a0),&0x80 # return default result in fp0
9493: rts
9494:
9495: #
9496: # OVFL is enabled.
9497: # the INEX2 bit has already been updated by the round to the correct precision.
9498: # now, round to extended(and don't alter the FPSR).
9499: #
9500: fneg_sd_ovfl_ena:
9501: mov.l %d2,-(%sp) # save d2
9502: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9503: mov.l %d1,%d2 # make a copy
9504: andi.l &0x7fff,%d1 # strip sign
9505: andi.w &0x8000,%d2 # keep old sign
9506: sub.l %d0,%d1 # add scale factor
9507: subi.l &0x6000,%d1 # subtract bias
9508: andi.w &0x7fff,%d1
9509: or.w %d2,%d1 # concat sign,exp
9510: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9511: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9512: mov.l (%sp)+,%d2 # restore d2
9513: bra.b fneg_sd_ovfl_dis
9514:
9515: #
9516: # the move in MAY underflow. so...
9517: #
9518: fneg_sd_may_ovfl:
9519: fmov.l &0x0,%fpsr # clear FPSR
9520: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9521:
9522: fneg.x FP_SCR0(%a6),%fp0 # perform negation
9523:
9524: fmov.l %fpsr,%d1 # save status
9525: fmov.l &0x0,%fpcr # clear FPCR
9526:
9527: or.l %d1,USER_FPSR(%a6) # save INEX2,N
9528:
9529: fabs.x %fp0,%fp1 # make a copy of result
9530: fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9531: fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9532:
9533: # no, it didn't overflow; we have correct result
9534: bra.w fneg_sd_normal_exit
9535:
9536: ##########################################################################
9537:
9538: #
9539: # input is not normalized; what is it?
9540: #
9541: fneg_not_norm:
9542: cmpi.b %d1,&DENORM # weed out DENORM
9543: beq.w fneg_denorm
9544: cmpi.b %d1,&SNAN # weed out SNAN
9545: beq.l res_snan_1op
9546: cmpi.b %d1,&QNAN # weed out QNAN
9547: beq.l res_qnan_1op
9548:
9549: #
9550: # do the fneg; at this point, only possible ops are ZERO and INF.
9551: # use fneg to determine ccodes.
9552: # prec:mode should be zero at this point but it won't affect answer anyways.
9553: #
9554: fneg.x SRC_EX(%a0),%fp0 # do fneg
9555: fmov.l %fpsr,%d0
9556: rol.l &0x8,%d0 # put ccodes in lo byte
9557: mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9558: rts
9559:
9560: #########################################################################
9561: # XDEF **************************************************************** #
9562: # ftst(): emulates the ftest instruction #
9563: # #
9564: # XREF **************************************************************** #
9565: # res{s,q}nan_1op() - set NAN result for monadic instruction #
9566: # #
9567: # INPUT *************************************************************** #
9568: # a0 = pointer to extended precision source operand #
9569: # #
9570: # OUTPUT ************************************************************** #
9571: # none #
9572: # #
9573: # ALGORITHM *********************************************************** #
9574: # Check the source operand tag (STAG) and set the FPCR according #
9575: # to the operand type and sign. #
9576: # #
9577: #########################################################################
9578:
9579: global ftst
9580: ftst:
9581: mov.b STAG(%a6),%d1
9582: bne.b ftst_not_norm # optimize on non-norm input
9583:
9584: #
9585: # Norm:
9586: #
9587: ftst_norm:
9588: tst.b SRC_EX(%a0) # is operand negative?
9589: bmi.b ftst_norm_m # yes
9590: rts
9591: ftst_norm_m:
9592: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9593: rts
9594:
9595: #
9596: # input is not normalized; what is it?
9597: #
9598: ftst_not_norm:
9599: cmpi.b %d1,&ZERO # weed out ZERO
9600: beq.b ftst_zero
9601: cmpi.b %d1,&INF # weed out INF
9602: beq.b ftst_inf
9603: cmpi.b %d1,&SNAN # weed out SNAN
9604: beq.l res_snan_1op
9605: cmpi.b %d1,&QNAN # weed out QNAN
9606: beq.l res_qnan_1op
9607:
9608: #
9609: # Denorm:
9610: #
9611: ftst_denorm:
9612: tst.b SRC_EX(%a0) # is operand negative?
9613: bmi.b ftst_denorm_m # yes
9614: rts
9615: ftst_denorm_m:
9616: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9617: rts
9618:
9619: #
9620: # Infinity:
9621: #
9622: ftst_inf:
9623: tst.b SRC_EX(%a0) # is operand negative?
9624: bmi.b ftst_inf_m # yes
9625: ftst_inf_p:
9626: mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9627: rts
9628: ftst_inf_m:
9629: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9630: rts
9631:
9632: #
9633: # Zero:
9634: #
9635: ftst_zero:
9636: tst.b SRC_EX(%a0) # is operand negative?
9637: bmi.b ftst_zero_m # yes
9638: ftst_zero_p:
9639: mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9640: rts
9641: ftst_zero_m:
9642: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9643: rts
9644:
9645: #########################################################################
9646: # XDEF **************************************************************** #
9647: # fint(): emulates the fint instruction #
9648: # #
9649: # XREF **************************************************************** #
9650: # res_{s,q}nan_1op() - set NAN result for monadic operation #
9651: # #
9652: # INPUT *************************************************************** #
9653: # a0 = pointer to extended precision source operand #
9654: # d0 = round precision/mode #
9655: # #
9656: # OUTPUT ************************************************************** #
9657: # fp0 = result #
9658: # #
9659: # ALGORITHM *********************************************************** #
9660: # Separate according to operand type. Unnorms don't pass through #
9661: # here. For norms, load the rounding mode/prec, execute a "fint", then #
9662: # store the resulting FPSR bits. #
9663: # For denorms, force the j-bit to a one and do the same as for #
9664: # norms. Denorms are so low that the answer will either be a zero or a #
9665: # one. #
9666: # For zeroes/infs/NANs, return the same while setting the FPSR #
9667: # as appropriate. #
9668: # #
9669: #########################################################################
9670:
9671: global fint
9672: fint:
9673: mov.b STAG(%a6),%d1
9674: bne.b fint_not_norm # optimize on non-norm input
9675:
9676: #
9677: # Norm:
9678: #
9679: fint_norm:
9680: andi.b &0x30,%d0 # set prec = ext
9681:
9682: fmov.l %d0,%fpcr # set FPCR
9683: fmov.l &0x0,%fpsr # clear FPSR
9684:
9685: fint.x SRC(%a0),%fp0 # execute fint
9686:
9687: fmov.l &0x0,%fpcr # clear FPCR
9688: fmov.l %fpsr,%d0 # save FPSR
9689: or.l %d0,USER_FPSR(%a6) # set exception bits
9690:
9691: rts
9692:
9693: #
9694: # input is not normalized; what is it?
9695: #
9696: fint_not_norm:
9697: cmpi.b %d1,&ZERO # weed out ZERO
9698: beq.b fint_zero
9699: cmpi.b %d1,&INF # weed out INF
9700: beq.b fint_inf
9701: cmpi.b %d1,&DENORM # weed out DENORM
9702: beq.b fint_denorm
9703: cmpi.b %d1,&SNAN # weed out SNAN
9704: beq.l res_snan_1op
9705: bra.l res_qnan_1op # weed out QNAN
9706:
9707: #
9708: # Denorm:
9709: #
9710: # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9711: # also, the INEX2 and AINEX exception bits will be set.
9712: # so, we could either set these manually or force the DENORM
9713: # to a very small NORM and ship it to the NORM routine.
9714: # I do the latter.
9715: #
9716: fint_denorm:
9717: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9718: mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9719: lea FP_SCR0(%a6),%a0
9720: bra.b fint_norm
9721:
9722: #
9723: # Zero:
9724: #
9725: fint_zero:
9726: tst.b SRC_EX(%a0) # is ZERO negative?
9727: bmi.b fint_zero_m # yes
9728: fint_zero_p:
9729: fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9730: mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9731: rts
9732: fint_zero_m:
9733: fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9734: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9735: rts
9736:
9737: #
9738: # Infinity:
9739: #
9740: fint_inf:
9741: fmovm.x SRC(%a0),&0x80 # return result in fp0
9742: tst.b SRC_EX(%a0) # is INF negative?
9743: bmi.b fint_inf_m # yes
9744: fint_inf_p:
9745: mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9746: rts
9747: fint_inf_m:
9748: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9749: rts
9750:
9751: #########################################################################
9752: # XDEF **************************************************************** #
9753: # fintrz(): emulates the fintrz instruction #
9754: # #
9755: # XREF **************************************************************** #
9756: # res_{s,q}nan_1op() - set NAN result for monadic operation #
9757: # #
9758: # INPUT *************************************************************** #
9759: # a0 = pointer to extended precision source operand #
9760: # d0 = round precision/mode #
9761: # #
9762: # OUTPUT ************************************************************** #
9763: # fp0 = result #
9764: # #
9765: # ALGORITHM *********************************************************** #
9766: # Separate according to operand type. Unnorms don't pass through #
9767: # here. For norms, load the rounding mode/prec, execute a "fintrz", #
9768: # then store the resulting FPSR bits. #
9769: # For denorms, force the j-bit to a one and do the same as for #
9770: # norms. Denorms are so low that the answer will either be a zero or a #
9771: # one. #
9772: # For zeroes/infs/NANs, return the same while setting the FPSR #
9773: # as appropriate. #
9774: # #
9775: #########################################################################
9776:
9777: global fintrz
9778: fintrz:
9779: mov.b STAG(%a6),%d1
9780: bne.b fintrz_not_norm # optimize on non-norm input
9781:
9782: #
9783: # Norm:
9784: #
9785: fintrz_norm:
9786: fmov.l &0x0,%fpsr # clear FPSR
9787:
9788: fintrz.x SRC(%a0),%fp0 # execute fintrz
9789:
9790: fmov.l %fpsr,%d0 # save FPSR
9791: or.l %d0,USER_FPSR(%a6) # set exception bits
9792:
9793: rts
9794:
9795: #
9796: # input is not normalized; what is it?
9797: #
9798: fintrz_not_norm:
9799: cmpi.b %d1,&ZERO # weed out ZERO
9800: beq.b fintrz_zero
9801: cmpi.b %d1,&INF # weed out INF
9802: beq.b fintrz_inf
9803: cmpi.b %d1,&DENORM # weed out DENORM
9804: beq.b fintrz_denorm
9805: cmpi.b %d1,&SNAN # weed out SNAN
9806: beq.l res_snan_1op
9807: bra.l res_qnan_1op # weed out QNAN
9808:
9809: #
9810: # Denorm:
9811: #
9812: # for DENORMs, the result will be (+/-)ZERO.
9813: # also, the INEX2 and AINEX exception bits will be set.
9814: # so, we could either set these manually or force the DENORM
9815: # to a very small NORM and ship it to the NORM routine.
9816: # I do the latter.
9817: #
9818: fintrz_denorm:
9819: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9820: mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9821: lea FP_SCR0(%a6),%a0
9822: bra.b fintrz_norm
9823:
9824: #
9825: # Zero:
9826: #
9827: fintrz_zero:
9828: tst.b SRC_EX(%a0) # is ZERO negative?
9829: bmi.b fintrz_zero_m # yes
9830: fintrz_zero_p:
9831: fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9832: mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9833: rts
9834: fintrz_zero_m:
9835: fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9836: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9837: rts
9838:
9839: #
9840: # Infinity:
9841: #
9842: fintrz_inf:
9843: fmovm.x SRC(%a0),&0x80 # return result in fp0
9844: tst.b SRC_EX(%a0) # is INF negative?
9845: bmi.b fintrz_inf_m # yes
9846: fintrz_inf_p:
9847: mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9848: rts
9849: fintrz_inf_m:
9850: mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9851: rts
9852:
9853: #########################################################################
9854: # XDEF **************************************************************** #
9855: # fabs(): emulates the fabs instruction #
9856: # fsabs(): emulates the fsabs instruction #
9857: # fdabs(): emulates the fdabs instruction #
9858: # #
9859: # XREF **************************************************************** #
9860: # norm() - normalize denorm mantissa to provide EXOP #
9861: # scale_to_zero_src() - make exponent. = 0; get scale factor #
9862: # unf_res() - calculate underflow result #
9863: # ovf_res() - calculate overflow result #
9864: # res_{s,q}nan_1op() - set NAN result for monadic operation #
9865: # #
9866: # INPUT *************************************************************** #
9867: # a0 = pointer to extended precision source operand #
9868: # d0 = rnd precision/mode #
9869: # #
9870: # OUTPUT ************************************************************** #
9871: # fp0 = result #
9872: # fp1 = EXOP (if exception occurred) #
9873: # #
9874: # ALGORITHM *********************************************************** #
9875: # Handle NANs, infinities, and zeroes as special cases. Divide #
9876: # norms into extended, single, and double precision. #
9877: # Simply clear sign for extended precision norm. Ext prec denorm #
9878: # gets an EXOP created for it since it's an underflow. #
9879: # Double and single precision can overflow and underflow. First, #
9880: # scale the operand such that the exponent is zero. Perform an "fabs" #
9881: # using the correct rnd mode/prec. Check to see if the original #
9882: # exponent would take an exception. If so, use unf_res() or ovf_res() #
9883: # to calculate the default result. Also, create the EXOP for the #
9884: # exceptional case. If no exception should occur, insert the correct #
9885: # result exponent and return. #
9886: # Unnorms don't pass through here. #
9887: # #
9888: #########################################################################
9889:
9890: global fsabs
9891: fsabs:
9892: andi.b &0x30,%d0 # clear rnd prec
9893: ori.b &s_mode*0x10,%d0 # insert sgl precision
9894: bra.b fabs
9895:
9896: global fdabs
9897: fdabs:
9898: andi.b &0x30,%d0 # clear rnd prec
9899: ori.b &d_mode*0x10,%d0 # insert dbl precision
9900:
9901: global fabs
9902: fabs:
9903: mov.l %d0,L_SCR3(%a6) # store rnd info
9904: mov.b STAG(%a6),%d1
9905: bne.w fabs_not_norm # optimize on non-norm input
9906:
9907: #
9908: # ABSOLUTE VALUE: norms and denorms ONLY!
9909: #
9910: fabs_norm:
9911: andi.b &0xc0,%d0 # is precision extended?
9912: bne.b fabs_not_ext # no; go handle sgl or dbl
9913:
9914: #
9915: # precision selected is extended. so...we can not get an underflow
9916: # or overflow because of rounding to the correct precision. so...
9917: # skip the scaling and unscaling...
9918: #
9919: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9920: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9921: mov.w SRC_EX(%a0),%d1
9922: bclr &15,%d1 # force absolute value
9923: mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9924: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9925: rts
9926:
9927: #
9928: # for an extended precision DENORM, the UNFL exception bit is set
9929: # the accrued bit is NOT set in this instance(no inexactness!)
9930: #
9931: fabs_denorm:
9932: andi.b &0xc0,%d0 # is precision extended?
9933: bne.b fabs_not_ext # no
9934:
9935: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9936:
9937: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9938: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9939: mov.w SRC_EX(%a0),%d0
9940: bclr &15,%d0 # clear sign
9941: mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9942:
9943: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9944:
9945: btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9946: bne.b fabs_ext_unfl_ena
9947: rts
9948:
9949: #
9950: # the input is an extended DENORM and underflow is enabled in the FPCR.
9951: # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9952: # exponent and insert back into the operand.
9953: #
9954: fabs_ext_unfl_ena:
9955: lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9956: bsr.l norm # normalize result
9957: neg.w %d0 # new exponent = -(shft val)
9958: addi.w &0x6000,%d0 # add new bias to exponent
9959: mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9960: andi.w &0x8000,%d1 # keep old sign
9961: andi.w &0x7fff,%d0 # clear sign position
9962: or.w %d1,%d0 # concat old sign, new exponent
9963: mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9964: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9965: rts
9966:
9967: #
9968: # operand is either single or double
9969: #
9970: fabs_not_ext:
9971: cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9972: bne.b fabs_dbl
9973:
9974: #
9975: # operand is to be rounded to single precision
9976: #
9977: fabs_sgl:
9978: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9979: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9980: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9981: bsr.l scale_to_zero_src # calculate scale factor
9982:
9983: cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9984: bge.w fabs_sd_unfl # yes; go handle underflow
9985: cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9986: beq.w fabs_sd_may_ovfl # maybe; go check
9987: blt.w fabs_sd_ovfl # yes; go handle overflow
9988:
9989: #
9990: # operand will NOT overflow or underflow when moved in to the fp reg file
9991: #
9992: fabs_sd_normal:
9993: fmov.l &0x0,%fpsr # clear FPSR
9994: fmov.l L_SCR3(%a6),%fpcr # set FPCR
9995:
9996: fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9997:
9998: fmov.l %fpsr,%d1 # save FPSR
9999: fmov.l &0x0,%fpcr # clear FPCR
10000:
10001: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10002:
10003: fabs_sd_normal_exit:
10004: mov.l %d2,-(%sp) # save d2
10005: fmovm.x &0x80,FP_SCR0(%a6) # store out result
10006: mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
10007: mov.l %d1,%d2 # make a copy
10008: andi.l &0x7fff,%d1 # strip sign
10009: sub.l %d0,%d1 # add scale factor
10010: andi.w &0x8000,%d2 # keep old sign
10011: or.w %d1,%d2 # concat old sign,new exp
10012: mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10013: mov.l (%sp)+,%d2 # restore d2
10014: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10015: rts
10016:
10017: #
10018: # operand is to be rounded to double precision
10019: #
10020: fabs_dbl:
10021: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10022: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10023: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10024: bsr.l scale_to_zero_src # calculate scale factor
10025:
10026: cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10027: bge.b fabs_sd_unfl # yes; go handle underflow
10028: cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10029: beq.w fabs_sd_may_ovfl # maybe; go check
10030: blt.w fabs_sd_ovfl # yes; go handle overflow
10031: bra.w fabs_sd_normal # no; ho handle normalized op
10032:
10033: #
10034: # operand WILL underflow when moved in to the fp register file
10035: #
10036: fabs_sd_unfl:
10037: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10038:
10039: bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10040:
10041: # if underflow or inexact is enabled, go calculate EXOP first.
10042: mov.b FPCR_ENABLE(%a6),%d1
10043: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10044: bne.b fabs_sd_unfl_ena # yes
10045:
10046: fabs_sd_unfl_dis:
10047: lea FP_SCR0(%a6),%a0 # pass: result addr
10048: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10049: bsr.l unf_res # calculate default result
10050: or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10051: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10052: rts
10053:
10054: #
10055: # operand will underflow AND underflow is enabled.
10056: # therefore, we must return the result rounded to extended precision.
10057: #
10058: fabs_sd_unfl_ena:
10059: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10060: mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10061: mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10062:
10063: mov.l %d2,-(%sp) # save d2
10064: mov.l %d1,%d2 # make a copy
10065: andi.l &0x7fff,%d1 # strip sign
10066: andi.w &0x8000,%d2 # keep old sign
10067: sub.l %d0,%d1 # subtract scale factor
10068: addi.l &0x6000,%d1 # add new bias
10069: andi.w &0x7fff,%d1
10070: or.w %d2,%d1 # concat new sign,new exp
10071: mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10072: fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10073: mov.l (%sp)+,%d2 # restore d2
10074: bra.b fabs_sd_unfl_dis
10075:
10076: #
10077: # operand WILL overflow.
10078: #
10079: fabs_sd_ovfl:
10080: fmov.l &0x0,%fpsr # clear FPSR
10081: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10082:
10083: fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10084:
10085: fmov.l &0x0,%fpcr # clear FPCR
10086: fmov.l %fpsr,%d1 # save FPSR
10087:
10088: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10089:
10090: fabs_sd_ovfl_tst:
10091: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10092:
10093: mov.b FPCR_ENABLE(%a6),%d1
10094: andi.b &0x13,%d1 # is OVFL or INEX enabled?
10095: bne.b fabs_sd_ovfl_ena # yes
10096:
10097: #
10098: # OVFL is not enabled; therefore, we must create the default result by
10099: # calling ovf_res().
10100: #
10101: fabs_sd_ovfl_dis:
10102: btst &neg_bit,FPSR_CC(%a6) # is result negative?
10103: sne %d1 # set sign param accordingly
10104: mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10105: bsr.l ovf_res # calculate default result
10106: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10107: fmovm.x (%a0),&0x80 # return default result in fp0
10108: rts
10109:
10110: #
10111: # OVFL is enabled.
10112: # the INEX2 bit has already been updated by the round to the correct precision.
10113: # now, round to extended(and don't alter the FPSR).
10114: #
10115: fabs_sd_ovfl_ena:
10116: mov.l %d2,-(%sp) # save d2
10117: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10118: mov.l %d1,%d2 # make a copy
10119: andi.l &0x7fff,%d1 # strip sign
10120: andi.w &0x8000,%d2 # keep old sign
10121: sub.l %d0,%d1 # add scale factor
10122: subi.l &0x6000,%d1 # subtract bias
10123: andi.w &0x7fff,%d1
10124: or.w %d2,%d1 # concat sign,exp
10125: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10126: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10127: mov.l (%sp)+,%d2 # restore d2
10128: bra.b fabs_sd_ovfl_dis
10129:
10130: #
10131: # the move in MAY underflow. so...
10132: #
10133: fabs_sd_may_ovfl:
10134: fmov.l &0x0,%fpsr # clear FPSR
10135: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10136:
10137: fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10138:
10139: fmov.l %fpsr,%d1 # save status
10140: fmov.l &0x0,%fpcr # clear FPCR
10141:
10142: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10143:
10144: fabs.x %fp0,%fp1 # make a copy of result
10145: fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10146: fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10147:
10148: # no, it didn't overflow; we have correct result
10149: bra.w fabs_sd_normal_exit
10150:
10151: ##########################################################################
10152:
10153: #
10154: # input is not normalized; what is it?
10155: #
10156: fabs_not_norm:
10157: cmpi.b %d1,&DENORM # weed out DENORM
10158: beq.w fabs_denorm
10159: cmpi.b %d1,&SNAN # weed out SNAN
10160: beq.l res_snan_1op
10161: cmpi.b %d1,&QNAN # weed out QNAN
10162: beq.l res_qnan_1op
10163:
10164: fabs.x SRC(%a0),%fp0 # force absolute value
10165:
10166: cmpi.b %d1,&INF # weed out INF
10167: beq.b fabs_inf
10168: fabs_zero:
10169: mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10170: rts
10171: fabs_inf:
10172: mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10173: rts
10174:
10175: #########################################################################
10176: # XDEF **************************************************************** #
10177: # fcmp(): fp compare op routine #
10178: # #
10179: # XREF **************************************************************** #
10180: # res_qnan() - return QNAN result #
10181: # res_snan() - return SNAN result #
10182: # #
10183: # INPUT *************************************************************** #
10184: # a0 = pointer to extended precision source operand #
10185: # a1 = pointer to extended precision destination operand #
10186: # d0 = round prec/mode #
10187: # #
10188: # OUTPUT ************************************************************** #
10189: # None #
10190: # #
10191: # ALGORITHM *********************************************************** #
10192: # Handle NANs and denorms as special cases. For everything else, #
10193: # just use the actual fcmp instruction to produce the correct condition #
10194: # codes. #
10195: # #
10196: #########################################################################
10197:
10198: global fcmp
10199: fcmp:
10200: clr.w %d1
10201: mov.b DTAG(%a6),%d1
10202: lsl.b &0x3,%d1
10203: or.b STAG(%a6),%d1
10204: bne.b fcmp_not_norm # optimize on non-norm input
10205:
10206: #
10207: # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10208: #
10209: fcmp_norm:
10210: fmovm.x DST(%a1),&0x80 # load dst op
10211:
10212: fcmp.x %fp0,SRC(%a0) # do compare
10213:
10214: fmov.l %fpsr,%d0 # save FPSR
10215: rol.l &0x8,%d0 # extract ccode bits
10216: mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10217:
10218: rts
10219:
10220: #
10221: # fcmp: inputs are not both normalized; what are they?
10222: #
10223: fcmp_not_norm:
10224: mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10225: jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10226:
10227: swbeg &48
10228: tbl_fcmp_op:
10229: short fcmp_norm - tbl_fcmp_op # NORM - NORM
10230: short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10231: short fcmp_norm - tbl_fcmp_op # NORM - INF
10232: short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10233: short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10234: short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10235: short tbl_fcmp_op - tbl_fcmp_op #
10236: short tbl_fcmp_op - tbl_fcmp_op #
10237:
10238: short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10239: short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10240: short fcmp_norm - tbl_fcmp_op # ZERO - INF
10241: short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10242: short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10243: short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10244: short tbl_fcmp_op - tbl_fcmp_op #
10245: short tbl_fcmp_op - tbl_fcmp_op #
10246:
10247: short fcmp_norm - tbl_fcmp_op # INF - NORM
10248: short fcmp_norm - tbl_fcmp_op # INF - ZERO
10249: short fcmp_norm - tbl_fcmp_op # INF - INF
10250: short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10251: short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10252: short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10253: short tbl_fcmp_op - tbl_fcmp_op #
10254: short tbl_fcmp_op - tbl_fcmp_op #
10255:
10256: short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10257: short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10258: short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10259: short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10260: short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10261: short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10262: short tbl_fcmp_op - tbl_fcmp_op #
10263: short tbl_fcmp_op - tbl_fcmp_op #
10264:
10265: short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10266: short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10267: short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10268: short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10269: short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10270: short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10271: short tbl_fcmp_op - tbl_fcmp_op #
10272: short tbl_fcmp_op - tbl_fcmp_op #
10273:
10274: short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10275: short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10276: short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10277: short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10278: short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10279: short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10280: short tbl_fcmp_op - tbl_fcmp_op #
10281: short tbl_fcmp_op - tbl_fcmp_op #
10282:
10283: # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10284: # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10285: fcmp_res_qnan:
10286: bsr.l res_qnan
10287: andi.b &0xf7,FPSR_CC(%a6)
10288: rts
10289: fcmp_res_snan:
10290: bsr.l res_snan
10291: andi.b &0xf7,FPSR_CC(%a6)
10292: rts
10293:
10294: #
10295: # DENORMs are a little more difficult.
10296: # If you have a 2 DENORMs, then you can just force the j-bit to a one
10297: # and use the fcmp_norm routine.
10298: # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10299: # and use the fcmp_norm routine.
10300: # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10301: # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10302: # (1) signs are (+) and the DENORM is the dst or
10303: # (2) signs are (-) and the DENORM is the src
10304: #
10305:
10306: fcmp_dnrm_s:
10307: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10308: mov.l SRC_HI(%a0),%d0
10309: bset &31,%d0 # DENORM src; make into small norm
10310: mov.l %d0,FP_SCR0_HI(%a6)
10311: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10312: lea FP_SCR0(%a6),%a0
10313: bra.w fcmp_norm
10314:
10315: fcmp_dnrm_d:
10316: mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10317: mov.l DST_HI(%a1),%d0
10318: bset &31,%d0 # DENORM src; make into small norm
10319: mov.l %d0,FP_SCR0_HI(%a6)
10320: mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10321: lea FP_SCR0(%a6),%a1
10322: bra.w fcmp_norm
10323:
10324: fcmp_dnrm_sd:
10325: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10326: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10327: mov.l DST_HI(%a1),%d0
10328: bset &31,%d0 # DENORM dst; make into small norm
10329: mov.l %d0,FP_SCR1_HI(%a6)
10330: mov.l SRC_HI(%a0),%d0
10331: bset &31,%d0 # DENORM dst; make into small norm
10332: mov.l %d0,FP_SCR0_HI(%a6)
10333: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10334: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10335: lea FP_SCR1(%a6),%a1
10336: lea FP_SCR0(%a6),%a0
10337: bra.w fcmp_norm
10338:
10339: fcmp_nrm_dnrm:
10340: mov.b SRC_EX(%a0),%d0 # determine if like signs
10341: mov.b DST_EX(%a1),%d1
10342: eor.b %d0,%d1
10343: bmi.w fcmp_dnrm_s
10344:
10345: # signs are the same, so must determine the answer ourselves.
10346: tst.b %d0 # is src op negative?
10347: bmi.b fcmp_nrm_dnrm_m # yes
10348: rts
10349: fcmp_nrm_dnrm_m:
10350: mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10351: rts
10352:
10353: fcmp_dnrm_nrm:
10354: mov.b SRC_EX(%a0),%d0 # determine if like signs
10355: mov.b DST_EX(%a1),%d1
10356: eor.b %d0,%d1
10357: bmi.w fcmp_dnrm_d
10358:
10359: # signs are the same, so must determine the answer ourselves.
10360: tst.b %d0 # is src op negative?
10361: bpl.b fcmp_dnrm_nrm_m # no
10362: rts
10363: fcmp_dnrm_nrm_m:
10364: mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10365: rts
10366:
10367: #########################################################################
10368: # XDEF **************************************************************** #
10369: # fsglmul(): emulates the fsglmul instruction #
10370: # #
10371: # XREF **************************************************************** #
10372: # scale_to_zero_src() - scale src exponent to zero #
10373: # scale_to_zero_dst() - scale dst exponent to zero #
10374: # unf_res4() - return default underflow result for sglop #
10375: # ovf_res() - return default overflow result #
10376: # res_qnan() - return QNAN result #
10377: # res_snan() - return SNAN result #
10378: # #
10379: # INPUT *************************************************************** #
10380: # a0 = pointer to extended precision source operand #
10381: # a1 = pointer to extended precision destination operand #
10382: # d0 rnd prec,mode #
10383: # #
10384: # OUTPUT ************************************************************** #
10385: # fp0 = result #
10386: # fp1 = EXOP (if exception occurred) #
10387: # #
10388: # ALGORITHM *********************************************************** #
10389: # Handle NANs, infinities, and zeroes as special cases. Divide #
10390: # norms/denorms into ext/sgl/dbl precision. #
10391: # For norms/denorms, scale the exponents such that a multiply #
10392: # instruction won't cause an exception. Use the regular fsglmul to #
10393: # compute a result. Check if the regular operands would have taken #
10394: # an exception. If so, return the default overflow/underflow result #
10395: # and return the EXOP if exceptions are enabled. Else, scale the #
10396: # result operand to the proper exponent. #
10397: # #
10398: #########################################################################
10399:
10400: global fsglmul
10401: fsglmul:
10402: mov.l %d0,L_SCR3(%a6) # store rnd info
10403:
10404: clr.w %d1
10405: mov.b DTAG(%a6),%d1
10406: lsl.b &0x3,%d1
10407: or.b STAG(%a6),%d1
10408:
10409: bne.w fsglmul_not_norm # optimize on non-norm input
10410:
10411: fsglmul_norm:
10412: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10413: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10414: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10415:
10416: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10417: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10418: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10419:
10420: bsr.l scale_to_zero_src # scale exponent
10421: mov.l %d0,-(%sp) # save scale factor 1
10422:
10423: bsr.l scale_to_zero_dst # scale dst exponent
10424:
10425: add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10426:
10427: cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10428: beq.w fsglmul_may_ovfl # result may rnd to overflow
10429: blt.w fsglmul_ovfl # result will overflow
10430:
10431: cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10432: beq.w fsglmul_may_unfl # result may rnd to no unfl
10433: bgt.w fsglmul_unfl # result will underflow
10434:
10435: fsglmul_normal:
10436: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10437:
10438: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10439: fmov.l &0x0,%fpsr # clear FPSR
10440:
10441: fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10442:
10443: fmov.l %fpsr,%d1 # save status
10444: fmov.l &0x0,%fpcr # clear FPCR
10445:
10446: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10447:
10448: fsglmul_normal_exit:
10449: fmovm.x &0x80,FP_SCR0(%a6) # store out result
10450: mov.l %d2,-(%sp) # save d2
10451: mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10452: mov.l %d1,%d2 # make a copy
10453: andi.l &0x7fff,%d1 # strip sign
10454: andi.w &0x8000,%d2 # keep old sign
10455: sub.l %d0,%d1 # add scale factor
10456: or.w %d2,%d1 # concat old sign,new exp
10457: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10458: mov.l (%sp)+,%d2 # restore d2
10459: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10460: rts
10461:
10462: fsglmul_ovfl:
10463: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10464:
10465: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10466: fmov.l &0x0,%fpsr # clear FPSR
10467:
10468: fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10469:
10470: fmov.l %fpsr,%d1 # save status
10471: fmov.l &0x0,%fpcr # clear FPCR
10472:
10473: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10474:
10475: fsglmul_ovfl_tst:
10476:
10477: # save setting this until now because this is where fsglmul_may_ovfl may jump in
10478: or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10479:
10480: mov.b FPCR_ENABLE(%a6),%d1
10481: andi.b &0x13,%d1 # is OVFL or INEX enabled?
10482: bne.b fsglmul_ovfl_ena # yes
10483:
10484: fsglmul_ovfl_dis:
10485: btst &neg_bit,FPSR_CC(%a6) # is result negative?
10486: sne %d1 # set sign param accordingly
10487: mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10488: andi.b &0x30,%d0 # force prec = ext
10489: bsr.l ovf_res # calculate default result
10490: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10491: fmovm.x (%a0),&0x80 # return default result in fp0
10492: rts
10493:
10494: fsglmul_ovfl_ena:
10495: fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10496:
10497: mov.l %d2,-(%sp) # save d2
10498: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10499: mov.l %d1,%d2 # make a copy
10500: andi.l &0x7fff,%d1 # strip sign
10501: sub.l %d0,%d1 # add scale factor
10502: subi.l &0x6000,%d1 # subtract bias
10503: andi.w &0x7fff,%d1
10504: andi.w &0x8000,%d2 # keep old sign
10505: or.w %d2,%d1 # concat old sign,new exp
10506: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10507: mov.l (%sp)+,%d2 # restore d2
10508: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10509: bra.b fsglmul_ovfl_dis
10510:
10511: fsglmul_may_ovfl:
10512: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10513:
10514: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10515: fmov.l &0x0,%fpsr # clear FPSR
10516:
10517: fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10518:
10519: fmov.l %fpsr,%d1 # save status
10520: fmov.l &0x0,%fpcr # clear FPCR
10521:
10522: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10523:
10524: fabs.x %fp0,%fp1 # make a copy of result
10525: fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10526: fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10527:
10528: # no, it didn't overflow; we have correct result
10529: bra.w fsglmul_normal_exit
10530:
10531: fsglmul_unfl:
10532: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10533:
10534: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10535:
10536: fmov.l &rz_mode*0x10,%fpcr # set FPCR
10537: fmov.l &0x0,%fpsr # clear FPSR
10538:
10539: fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10540:
10541: fmov.l %fpsr,%d1 # save status
10542: fmov.l &0x0,%fpcr # clear FPCR
10543:
10544: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10545:
10546: mov.b FPCR_ENABLE(%a6),%d1
10547: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10548: bne.b fsglmul_unfl_ena # yes
10549:
10550: fsglmul_unfl_dis:
10551: fmovm.x &0x80,FP_SCR0(%a6) # store out result
10552:
10553: lea FP_SCR0(%a6),%a0 # pass: result addr
10554: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10555: bsr.l unf_res4 # calculate default result
10556: or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10557: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10558: rts
10559:
10560: #
10561: # UNFL is enabled.
10562: #
10563: fsglmul_unfl_ena:
10564: fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10565:
10566: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10567: fmov.l &0x0,%fpsr # clear FPSR
10568:
10569: fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10570:
10571: fmov.l &0x0,%fpcr # clear FPCR
10572:
10573: fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10574: mov.l %d2,-(%sp) # save d2
10575: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10576: mov.l %d1,%d2 # make a copy
10577: andi.l &0x7fff,%d1 # strip sign
10578: andi.w &0x8000,%d2 # keep old sign
10579: sub.l %d0,%d1 # add scale factor
10580: addi.l &0x6000,%d1 # add bias
10581: andi.w &0x7fff,%d1
10582: or.w %d2,%d1 # concat old sign,new exp
10583: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10584: mov.l (%sp)+,%d2 # restore d2
10585: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10586: bra.w fsglmul_unfl_dis
10587:
10588: fsglmul_may_unfl:
10589: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10590:
10591: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10592: fmov.l &0x0,%fpsr # clear FPSR
10593:
10594: fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10595:
10596: fmov.l %fpsr,%d1 # save status
10597: fmov.l &0x0,%fpcr # clear FPCR
10598:
10599: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10600:
10601: fabs.x %fp0,%fp1 # make a copy of result
10602: fcmp.b %fp1,&0x2 # is |result| > 2.b?
10603: fbgt.w fsglmul_normal_exit # no; no underflow occurred
10604: fblt.w fsglmul_unfl # yes; underflow occurred
10605:
10606: #
10607: # we still don't know if underflow occurred. result is ~ equal to 2. but,
10608: # we don't know if the result was an underflow that rounded up to a 2 or
10609: # a normalized number that rounded down to a 2. so, redo the entire operation
10610: # using RZ as the rounding mode to see what the pre-rounded result is.
10611: # this case should be relatively rare.
10612: #
10613: fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10614:
10615: mov.l L_SCR3(%a6),%d1
10616: andi.b &0xc0,%d1 # keep rnd prec
10617: ori.b &rz_mode*0x10,%d1 # insert RZ
10618:
10619: fmov.l %d1,%fpcr # set FPCR
10620: fmov.l &0x0,%fpsr # clear FPSR
10621:
10622: fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10623:
10624: fmov.l &0x0,%fpcr # clear FPCR
10625: fabs.x %fp1 # make absolute value
10626: fcmp.b %fp1,&0x2 # is |result| < 2.b?
10627: fbge.w fsglmul_normal_exit # no; no underflow occurred
10628: bra.w fsglmul_unfl # yes, underflow occurred
10629:
10630: ##############################################################################
10631:
10632: #
10633: # Single Precision Multiply: inputs are not both normalized; what are they?
10634: #
10635: fsglmul_not_norm:
10636: mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10637: jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10638:
10639: swbeg &48
10640: tbl_fsglmul_op:
10641: short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10642: short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10643: short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10644: short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10645: short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10646: short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10647: short tbl_fsglmul_op - tbl_fsglmul_op #
10648: short tbl_fsglmul_op - tbl_fsglmul_op #
10649:
10650: short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10651: short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10652: short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10653: short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10654: short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10655: short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10656: short tbl_fsglmul_op - tbl_fsglmul_op #
10657: short tbl_fsglmul_op - tbl_fsglmul_op #
10658:
10659: short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10660: short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10661: short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10662: short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10663: short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10664: short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10665: short tbl_fsglmul_op - tbl_fsglmul_op #
10666: short tbl_fsglmul_op - tbl_fsglmul_op #
10667:
10668: short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10669: short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10670: short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10671: short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10672: short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10673: short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10674: short tbl_fsglmul_op - tbl_fsglmul_op #
10675: short tbl_fsglmul_op - tbl_fsglmul_op #
10676:
10677: short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10678: short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10679: short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10680: short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10681: short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10682: short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10683: short tbl_fsglmul_op - tbl_fsglmul_op #
10684: short tbl_fsglmul_op - tbl_fsglmul_op #
10685:
10686: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10687: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10688: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10689: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10690: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10691: short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10692: short tbl_fsglmul_op - tbl_fsglmul_op #
10693: short tbl_fsglmul_op - tbl_fsglmul_op #
10694:
10695: fsglmul_res_operr:
10696: bra.l res_operr
10697: fsglmul_res_snan:
10698: bra.l res_snan
10699: fsglmul_res_qnan:
10700: bra.l res_qnan
10701: fsglmul_zero:
10702: bra.l fmul_zero
10703: fsglmul_inf_src:
10704: bra.l fmul_inf_src
10705: fsglmul_inf_dst:
10706: bra.l fmul_inf_dst
10707:
10708: #########################################################################
10709: # XDEF **************************************************************** #
10710: # fsgldiv(): emulates the fsgldiv instruction #
10711: # #
10712: # XREF **************************************************************** #
10713: # scale_to_zero_src() - scale src exponent to zero #
10714: # scale_to_zero_dst() - scale dst exponent to zero #
10715: # unf_res4() - return default underflow result for sglop #
10716: # ovf_res() - return default overflow result #
10717: # res_qnan() - return QNAN result #
10718: # res_snan() - return SNAN result #
10719: # #
10720: # INPUT *************************************************************** #
10721: # a0 = pointer to extended precision source operand #
10722: # a1 = pointer to extended precision destination operand #
10723: # d0 rnd prec,mode #
10724: # #
10725: # OUTPUT ************************************************************** #
10726: # fp0 = result #
10727: # fp1 = EXOP (if exception occurred) #
10728: # #
10729: # ALGORITHM *********************************************************** #
10730: # Handle NANs, infinities, and zeroes as special cases. Divide #
10731: # norms/denorms into ext/sgl/dbl precision. #
10732: # For norms/denorms, scale the exponents such that a divide #
10733: # instruction won't cause an exception. Use the regular fsgldiv to #
10734: # compute a result. Check if the regular operands would have taken #
10735: # an exception. If so, return the default overflow/underflow result #
10736: # and return the EXOP if exceptions are enabled. Else, scale the #
10737: # result operand to the proper exponent. #
10738: # #
10739: #########################################################################
10740:
10741: global fsgldiv
10742: fsgldiv:
10743: mov.l %d0,L_SCR3(%a6) # store rnd info
10744:
10745: clr.w %d1
10746: mov.b DTAG(%a6),%d1
10747: lsl.b &0x3,%d1
10748: or.b STAG(%a6),%d1 # combine src tags
10749:
10750: bne.w fsgldiv_not_norm # optimize on non-norm input
10751:
10752: #
10753: # DIVIDE: NORMs and DENORMs ONLY!
10754: #
10755: fsgldiv_norm:
10756: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10757: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10758: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10759:
10760: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10761: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10762: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10763:
10764: bsr.l scale_to_zero_src # calculate scale factor 1
10765: mov.l %d0,-(%sp) # save scale factor 1
10766:
10767: bsr.l scale_to_zero_dst # calculate scale factor 2
10768:
10769: neg.l (%sp) # S.F. = scale1 - scale2
10770: add.l %d0,(%sp)
10771:
10772: mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10773: lsr.b &0x6,%d1
10774: mov.l (%sp)+,%d0
10775: cmpi.l %d0,&0x3fff-0x7ffe
10776: ble.w fsgldiv_may_ovfl
10777:
10778: cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10779: beq.w fsgldiv_may_unfl # maybe
10780: bgt.w fsgldiv_unfl # yes; go handle underflow
10781:
10782: fsgldiv_normal:
10783: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10784:
10785: fmov.l L_SCR3(%a6),%fpcr # save FPCR
10786: fmov.l &0x0,%fpsr # clear FPSR
10787:
10788: fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10789:
10790: fmov.l %fpsr,%d1 # save FPSR
10791: fmov.l &0x0,%fpcr # clear FPCR
10792:
10793: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10794:
10795: fsgldiv_normal_exit:
10796: fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10797: mov.l %d2,-(%sp) # save d2
10798: mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10799: mov.l %d1,%d2 # make a copy
10800: andi.l &0x7fff,%d1 # strip sign
10801: andi.w &0x8000,%d2 # keep old sign
10802: sub.l %d0,%d1 # add scale factor
10803: or.w %d2,%d1 # concat old sign,new exp
10804: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10805: mov.l (%sp)+,%d2 # restore d2
10806: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10807: rts
10808:
10809: fsgldiv_may_ovfl:
10810: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10811:
10812: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10813: fmov.l &0x0,%fpsr # set FPSR
10814:
10815: fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10816:
10817: fmov.l %fpsr,%d1
10818: fmov.l &0x0,%fpcr
10819:
10820: or.l %d1,USER_FPSR(%a6) # save INEX,N
10821:
10822: fmovm.x &0x01,-(%sp) # save result to stack
10823: mov.w (%sp),%d1 # fetch new exponent
10824: add.l &0xc,%sp # clear result
10825: andi.l &0x7fff,%d1 # strip sign
10826: sub.l %d0,%d1 # add scale factor
10827: cmp.l %d1,&0x7fff # did divide overflow?
10828: blt.b fsgldiv_normal_exit
10829:
10830: fsgldiv_ovfl_tst:
10831: or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10832:
10833: mov.b FPCR_ENABLE(%a6),%d1
10834: andi.b &0x13,%d1 # is OVFL or INEX enabled?
10835: bne.b fsgldiv_ovfl_ena # yes
10836:
10837: fsgldiv_ovfl_dis:
10838: btst &neg_bit,FPSR_CC(%a6) # is result negative
10839: sne %d1 # set sign param accordingly
10840: mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10841: andi.b &0x30,%d0 # kill precision
10842: bsr.l ovf_res # calculate default result
10843: or.b %d0,FPSR_CC(%a6) # set INF if applicable
10844: fmovm.x (%a0),&0x80 # return default result in fp0
10845: rts
10846:
10847: fsgldiv_ovfl_ena:
10848: fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10849:
10850: mov.l %d2,-(%sp) # save d2
10851: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10852: mov.l %d1,%d2 # make a copy
10853: andi.l &0x7fff,%d1 # strip sign
10854: andi.w &0x8000,%d2 # keep old sign
10855: sub.l %d0,%d1 # add scale factor
10856: subi.l &0x6000,%d1 # subtract new bias
10857: andi.w &0x7fff,%d1 # clear ms bit
10858: or.w %d2,%d1 # concat old sign,new exp
10859: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10860: mov.l (%sp)+,%d2 # restore d2
10861: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10862: bra.b fsgldiv_ovfl_dis
10863:
10864: fsgldiv_unfl:
10865: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10866:
10867: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10868:
10869: fmov.l &rz_mode*0x10,%fpcr # set FPCR
10870: fmov.l &0x0,%fpsr # clear FPSR
10871:
10872: fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10873:
10874: fmov.l %fpsr,%d1 # save status
10875: fmov.l &0x0,%fpcr # clear FPCR
10876:
10877: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10878:
10879: mov.b FPCR_ENABLE(%a6),%d1
10880: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10881: bne.b fsgldiv_unfl_ena # yes
10882:
10883: fsgldiv_unfl_dis:
10884: fmovm.x &0x80,FP_SCR0(%a6) # store out result
10885:
10886: lea FP_SCR0(%a6),%a0 # pass: result addr
10887: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10888: bsr.l unf_res4 # calculate default result
10889: or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10890: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10891: rts
10892:
10893: #
10894: # UNFL is enabled.
10895: #
10896: fsgldiv_unfl_ena:
10897: fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10898:
10899: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10900: fmov.l &0x0,%fpsr # clear FPSR
10901:
10902: fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10903:
10904: fmov.l &0x0,%fpcr # clear FPCR
10905:
10906: fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10907: mov.l %d2,-(%sp) # save d2
10908: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10909: mov.l %d1,%d2 # make a copy
10910: andi.l &0x7fff,%d1 # strip sign
10911: andi.w &0x8000,%d2 # keep old sign
10912: sub.l %d0,%d1 # add scale factor
10913: addi.l &0x6000,%d1 # add bias
10914: andi.w &0x7fff,%d1 # clear top bit
10915: or.w %d2,%d1 # concat old sign, new exp
10916: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10917: mov.l (%sp)+,%d2 # restore d2
10918: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10919: bra.b fsgldiv_unfl_dis
10920:
10921: #
10922: # the divide operation MAY underflow:
10923: #
10924: fsgldiv_may_unfl:
10925: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10926:
10927: fmov.l L_SCR3(%a6),%fpcr # set FPCR
10928: fmov.l &0x0,%fpsr # clear FPSR
10929:
10930: fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10931:
10932: fmov.l %fpsr,%d1 # save status
10933: fmov.l &0x0,%fpcr # clear FPCR
10934:
10935: or.l %d1,USER_FPSR(%a6) # save INEX2,N
10936:
10937: fabs.x %fp0,%fp1 # make a copy of result
10938: fcmp.b %fp1,&0x1 # is |result| > 1.b?
10939: fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10940: fblt.w fsgldiv_unfl # yes; underflow occurred
10941:
10942: #
10943: # we still don't know if underflow occurred. result is ~ equal to 1. but,
10944: # we don't know if the result was an underflow that rounded up to a 1
10945: # or a normalized number that rounded down to a 1. so, redo the entire
10946: # operation using RZ as the rounding mode to see what the pre-rounded
10947: # result is. this case should be relatively rare.
10948: #
10949: fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10950:
10951: clr.l %d1 # clear scratch register
10952: ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10953:
10954: fmov.l %d1,%fpcr # set FPCR
10955: fmov.l &0x0,%fpsr # clear FPSR
10956:
10957: fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10958:
10959: fmov.l &0x0,%fpcr # clear FPCR
10960: fabs.x %fp1 # make absolute value
10961: fcmp.b %fp1,&0x1 # is |result| < 1.b?
10962: fbge.w fsgldiv_normal_exit # no; no underflow occurred
10963: bra.w fsgldiv_unfl # yes; underflow occurred
10964:
10965: ############################################################################
10966:
10967: #
10968: # Divide: inputs are not both normalized; what are they?
10969: #
10970: fsgldiv_not_norm:
10971: mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10972: jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10973:
10974: swbeg &48
10975: tbl_fsgldiv_op:
10976: short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10977: short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10978: short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10979: short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10980: short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10981: short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10982: short tbl_fsgldiv_op - tbl_fsgldiv_op #
10983: short tbl_fsgldiv_op - tbl_fsgldiv_op #
10984:
10985: short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10986: short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10987: short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10988: short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10989: short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10990: short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10991: short tbl_fsgldiv_op - tbl_fsgldiv_op #
10992: short tbl_fsgldiv_op - tbl_fsgldiv_op #
10993:
10994: short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10995: short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10996: short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10997: short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10998: short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10999: short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
11000: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11001: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11002:
11003: short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
11004: short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
11005: short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
11006: short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
11007: short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
11008: short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
11009: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11010: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11011:
11012: short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11013: short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11014: short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11015: short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11016: short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11017: short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11018: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11019: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11020:
11021: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11022: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11023: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11024: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11025: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11026: short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11027: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11028: short tbl_fsgldiv_op - tbl_fsgldiv_op #
11029:
11030: fsgldiv_res_qnan:
11031: bra.l res_qnan
11032: fsgldiv_res_snan:
11033: bra.l res_snan
11034: fsgldiv_res_operr:
11035: bra.l res_operr
11036: fsgldiv_inf_load:
11037: bra.l fdiv_inf_load
11038: fsgldiv_zero_load:
11039: bra.l fdiv_zero_load
11040: fsgldiv_inf_dst:
11041: bra.l fdiv_inf_dst
11042:
11043: #########################################################################
11044: # XDEF **************************************************************** #
11045: # fadd(): emulates the fadd instruction #
11046: # fsadd(): emulates the fadd instruction #
11047: # fdadd(): emulates the fdadd instruction #
11048: # #
11049: # XREF **************************************************************** #
11050: # addsub_scaler2() - scale the operands so they won't take exc #
11051: # ovf_res() - return default overflow result #
11052: # unf_res() - return default underflow result #
11053: # res_qnan() - set QNAN result #
11054: # res_snan() - set SNAN result #
11055: # res_operr() - set OPERR result #
11056: # scale_to_zero_src() - set src operand exponent equal to zero #
11057: # scale_to_zero_dst() - set dst operand exponent equal to zero #
11058: # #
11059: # INPUT *************************************************************** #
11060: # a0 = pointer to extended precision source operand #
11061: # a1 = pointer to extended precision destination operand #
11062: # #
11063: # OUTPUT ************************************************************** #
11064: # fp0 = result #
11065: # fp1 = EXOP (if exception occurred) #
11066: # #
11067: # ALGORITHM *********************************************************** #
11068: # Handle NANs, infinities, and zeroes as special cases. Divide #
11069: # norms into extended, single, and double precision. #
11070: # Do addition after scaling exponents such that exception won't #
11071: # occur. Then, check result exponent to see if exception would have #
11072: # occurred. If so, return default result and maybe EXOP. Else, insert #
11073: # the correct result exponent and return. Set FPSR bits as appropriate. #
11074: # #
11075: #########################################################################
11076:
11077: global fsadd
11078: fsadd:
11079: andi.b &0x30,%d0 # clear rnd prec
11080: ori.b &s_mode*0x10,%d0 # insert sgl prec
11081: bra.b fadd
11082:
11083: global fdadd
11084: fdadd:
11085: andi.b &0x30,%d0 # clear rnd prec
11086: ori.b &d_mode*0x10,%d0 # insert dbl prec
11087:
11088: global fadd
11089: fadd:
11090: mov.l %d0,L_SCR3(%a6) # store rnd info
11091:
11092: clr.w %d1
11093: mov.b DTAG(%a6),%d1
11094: lsl.b &0x3,%d1
11095: or.b STAG(%a6),%d1 # combine src tags
11096:
11097: bne.w fadd_not_norm # optimize on non-norm input
11098:
11099: #
11100: # ADD: norms and denorms
11101: #
11102: fadd_norm:
11103: bsr.l addsub_scaler2 # scale exponents
11104:
11105: fadd_zero_entry:
11106: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11107:
11108: fmov.l &0x0,%fpsr # clear FPSR
11109: fmov.l L_SCR3(%a6),%fpcr # set FPCR
11110:
11111: fadd.x FP_SCR0(%a6),%fp0 # execute add
11112:
11113: fmov.l &0x0,%fpcr # clear FPCR
11114: fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11115:
11116: or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11117:
11118: fbeq.w fadd_zero_exit # if result is zero, end now
11119:
11120: mov.l %d2,-(%sp) # save d2
11121:
11122: fmovm.x &0x01,-(%sp) # save result to stack
11123:
11124: mov.w 2+L_SCR3(%a6),%d1
11125: lsr.b &0x6,%d1
11126:
11127: mov.w (%sp),%d2 # fetch new sign, exp
11128: andi.l &0x7fff,%d2 # strip sign
11129: sub.l %d0,%d2 # add scale factor
11130:
11131: cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11132: bge.b fadd_ovfl # yes
11133:
11134: cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11135: blt.w fadd_unfl # yes
11136: beq.w fadd_may_unfl # maybe; go find out
11137:
11138: fadd_normal:
11139: mov.w (%sp),%d1
11140: andi.w &0x8000,%d1 # keep sign
11141: or.w %d2,%d1 # concat sign,new exp
11142: mov.w %d1,(%sp) # insert new exponent
11143:
11144: fmovm.x (%sp)+,&0x80 # return result in fp0
11145:
11146: mov.l (%sp)+,%d2 # restore d2
11147: rts
11148:
11149: fadd_zero_exit:
11150: # fmov.s &0x00000000,%fp0 # return zero in fp0
11151: rts
11152:
11153: tbl_fadd_ovfl:
11154: long 0x7fff # ext ovfl
11155: long 0x407f # sgl ovfl
11156: long 0x43ff # dbl ovfl
11157:
11158: tbl_fadd_unfl:
11159: long 0x0000 # ext unfl
11160: long 0x3f81 # sgl unfl
11161: long 0x3c01 # dbl unfl
11162:
11163: fadd_ovfl:
11164: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11165:
11166: mov.b FPCR_ENABLE(%a6),%d1
11167: andi.b &0x13,%d1 # is OVFL or INEX enabled?
11168: bne.b fadd_ovfl_ena # yes
11169:
11170: add.l &0xc,%sp
11171: fadd_ovfl_dis:
11172: btst &neg_bit,FPSR_CC(%a6) # is result negative?
11173: sne %d1 # set sign param accordingly
11174: mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11175: bsr.l ovf_res # calculate default result
11176: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11177: fmovm.x (%a0),&0x80 # return default result in fp0
11178: mov.l (%sp)+,%d2 # restore d2
11179: rts
11180:
11181: fadd_ovfl_ena:
11182: mov.b L_SCR3(%a6),%d1
11183: andi.b &0xc0,%d1 # is precision extended?
11184: bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11185:
11186: fadd_ovfl_ena_cont:
11187: mov.w (%sp),%d1
11188: andi.w &0x8000,%d1 # keep sign
11189: subi.l &0x6000,%d2 # add extra bias
11190: andi.w &0x7fff,%d2
11191: or.w %d2,%d1 # concat sign,new exp
11192: mov.w %d1,(%sp) # insert new exponent
11193:
11194: fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11195: bra.b fadd_ovfl_dis
11196:
11197: fadd_ovfl_ena_sd:
11198: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11199:
11200: mov.l L_SCR3(%a6),%d1
11201: andi.b &0x30,%d1 # keep rnd mode
11202: fmov.l %d1,%fpcr # set FPCR
11203:
11204: fadd.x FP_SCR0(%a6),%fp0 # execute add
11205:
11206: fmov.l &0x0,%fpcr # clear FPCR
11207:
11208: add.l &0xc,%sp
11209: fmovm.x &0x01,-(%sp)
11210: bra.b fadd_ovfl_ena_cont
11211:
11212: fadd_unfl:
11213: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11214:
11215: add.l &0xc,%sp
11216:
11217: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11218:
11219: fmov.l &rz_mode*0x10,%fpcr # set FPCR
11220: fmov.l &0x0,%fpsr # clear FPSR
11221:
11222: fadd.x FP_SCR0(%a6),%fp0 # execute add
11223:
11224: fmov.l &0x0,%fpcr # clear FPCR
11225: fmov.l %fpsr,%d1 # save status
11226:
11227: or.l %d1,USER_FPSR(%a6) # save INEX,N
11228:
11229: mov.b FPCR_ENABLE(%a6),%d1
11230: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11231: bne.b fadd_unfl_ena # yes
11232:
11233: fadd_unfl_dis:
11234: fmovm.x &0x80,FP_SCR0(%a6) # store out result
11235:
11236: lea FP_SCR0(%a6),%a0 # pass: result addr
11237: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11238: bsr.l unf_res # calculate default result
11239: or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11240: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11241: mov.l (%sp)+,%d2 # restore d2
11242: rts
11243:
11244: fadd_unfl_ena:
11245: fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11246:
11247: mov.l L_SCR3(%a6),%d1
11248: andi.b &0xc0,%d1 # is precision extended?
11249: bne.b fadd_unfl_ena_sd # no; sgl or dbl
11250:
11251: fmov.l L_SCR3(%a6),%fpcr # set FPCR
11252:
11253: fadd_unfl_ena_cont:
11254: fmov.l &0x0,%fpsr # clear FPSR
11255:
11256: fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11257:
11258: fmov.l &0x0,%fpcr # clear FPCR
11259:
11260: fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11261: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11262: mov.l %d1,%d2 # make a copy
11263: andi.l &0x7fff,%d1 # strip sign
11264: andi.w &0x8000,%d2 # keep old sign
11265: sub.l %d0,%d1 # add scale factor
11266: addi.l &0x6000,%d1 # add new bias
11267: andi.w &0x7fff,%d1 # clear top bit
11268: or.w %d2,%d1 # concat sign,new exp
11269: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11270: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11271: bra.w fadd_unfl_dis
11272:
11273: fadd_unfl_ena_sd:
11274: mov.l L_SCR3(%a6),%d1
11275: andi.b &0x30,%d1 # use only rnd mode
11276: fmov.l %d1,%fpcr # set FPCR
11277:
11278: bra.b fadd_unfl_ena_cont
11279:
11280: #
11281: # result is equal to the smallest normalized number in the selected precision
11282: # if the precision is extended, this result could not have come from an
11283: # underflow that rounded up.
11284: #
11285: fadd_may_unfl:
11286: mov.l L_SCR3(%a6),%d1
11287: andi.b &0xc0,%d1
11288: beq.w fadd_normal # yes; no underflow occurred
11289:
11290: mov.l 0x4(%sp),%d1 # extract hi(man)
11291: cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11292: bne.w fadd_normal # no; no underflow occurred
11293:
11294: tst.l 0x8(%sp) # is lo(man) = 0x0?
11295: bne.w fadd_normal # no; no underflow occurred
11296:
11297: btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11298: beq.w fadd_normal # no; no underflow occurred
11299:
11300: #
11301: # ok, so now the result has a exponent equal to the smallest normalized
11302: # exponent for the selected precision. also, the mantissa is equal to
11303: # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11304: # g,r,s.
11305: # now, we must determine whether the pre-rounded result was an underflow
11306: # rounded "up" or a normalized number rounded "down".
11307: # so, we do this be re-executing the add using RZ as the rounding mode and
11308: # seeing if the new result is smaller or equal to the current result.
11309: #
11310: fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11311:
11312: mov.l L_SCR3(%a6),%d1
11313: andi.b &0xc0,%d1 # keep rnd prec
11314: ori.b &rz_mode*0x10,%d1 # insert rnd mode
11315: fmov.l %d1,%fpcr # set FPCR
11316: fmov.l &0x0,%fpsr # clear FPSR
11317:
11318: fadd.x FP_SCR0(%a6),%fp1 # execute add
11319:
11320: fmov.l &0x0,%fpcr # clear FPCR
11321:
11322: fabs.x %fp0 # compare absolute values
11323: fabs.x %fp1
11324: fcmp.x %fp0,%fp1 # is first result > second?
11325:
11326: fbgt.w fadd_unfl # yes; it's an underflow
11327: bra.w fadd_normal # no; it's not an underflow
11328:
11329: ##########################################################################
11330:
11331: #
11332: # Add: inputs are not both normalized; what are they?
11333: #
11334: fadd_not_norm:
11335: mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11336: jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11337:
11338: swbeg &48
11339: tbl_fadd_op:
11340: short fadd_norm - tbl_fadd_op # NORM + NORM
11341: short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11342: short fadd_inf_src - tbl_fadd_op # NORM + INF
11343: short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11344: short fadd_norm - tbl_fadd_op # NORM + DENORM
11345: short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11346: short tbl_fadd_op - tbl_fadd_op #
11347: short tbl_fadd_op - tbl_fadd_op #
11348:
11349: short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11350: short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11351: short fadd_inf_src - tbl_fadd_op # ZERO + INF
11352: short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11353: short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11354: short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11355: short tbl_fadd_op - tbl_fadd_op #
11356: short tbl_fadd_op - tbl_fadd_op #
11357:
11358: short fadd_inf_dst - tbl_fadd_op # INF + NORM
11359: short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11360: short fadd_inf_2 - tbl_fadd_op # INF + INF
11361: short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11362: short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11363: short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11364: short tbl_fadd_op - tbl_fadd_op #
11365: short tbl_fadd_op - tbl_fadd_op #
11366:
11367: short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11368: short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11369: short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11370: short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11371: short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11372: short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11373: short tbl_fadd_op - tbl_fadd_op #
11374: short tbl_fadd_op - tbl_fadd_op #
11375:
11376: short fadd_norm - tbl_fadd_op # DENORM + NORM
11377: short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11378: short fadd_inf_src - tbl_fadd_op # DENORM + INF
11379: short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11380: short fadd_norm - tbl_fadd_op # DENORM + DENORM
11381: short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11382: short tbl_fadd_op - tbl_fadd_op #
11383: short tbl_fadd_op - tbl_fadd_op #
11384:
11385: short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11386: short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11387: short fadd_res_snan - tbl_fadd_op # SNAN + INF
11388: short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11389: short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11390: short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11391: short tbl_fadd_op - tbl_fadd_op #
11392: short tbl_fadd_op - tbl_fadd_op #
11393:
11394: fadd_res_qnan:
11395: bra.l res_qnan
11396: fadd_res_snan:
11397: bra.l res_snan
11398:
11399: #
11400: # both operands are ZEROes
11401: #
11402: fadd_zero_2:
11403: mov.b SRC_EX(%a0),%d0 # are the signs opposite
11404: mov.b DST_EX(%a1),%d1
11405: eor.b %d0,%d1
11406: bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11407:
11408: # the signs are the same. so determine whether they are positive or negative
11409: # and return the appropriately signed zero.
11410: tst.b %d0 # are ZEROes positive or negative?
11411: bmi.b fadd_zero_rm # negative
11412: fmov.s &0x00000000,%fp0 # return +ZERO
11413: mov.b &z_bmask,FPSR_CC(%a6) # set Z
11414: rts
11415:
11416: #
11417: # the ZEROes have opposite signs:
11418: # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11419: # - -ZERO is returned in the case of RM.
11420: #
11421: fadd_zero_2_chk_rm:
11422: mov.b 3+L_SCR3(%a6),%d1
11423: andi.b &0x30,%d1 # extract rnd mode
11424: cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11425: beq.b fadd_zero_rm # yes
11426: fmov.s &0x00000000,%fp0 # return +ZERO
11427: mov.b &z_bmask,FPSR_CC(%a6) # set Z
11428: rts
11429:
11430: fadd_zero_rm:
11431: fmov.s &0x80000000,%fp0 # return -ZERO
11432: mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11433: rts
11434:
11435: #
11436: # one operand is a ZERO and the other is a DENORM or NORM. scale
11437: # the DENORM or NORM and jump to the regular fadd routine.
11438: #
11439: fadd_zero_dst:
11440: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11441: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11442: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11443: bsr.l scale_to_zero_src # scale the operand
11444: clr.w FP_SCR1_EX(%a6)
11445: clr.l FP_SCR1_HI(%a6)
11446: clr.l FP_SCR1_LO(%a6)
11447: bra.w fadd_zero_entry # go execute fadd
11448:
11449: fadd_zero_src:
11450: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11451: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11452: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11453: bsr.l scale_to_zero_dst # scale the operand
11454: clr.w FP_SCR0_EX(%a6)
11455: clr.l FP_SCR0_HI(%a6)
11456: clr.l FP_SCR0_LO(%a6)
11457: bra.w fadd_zero_entry # go execute fadd
11458:
11459: #
11460: # both operands are INFs. an OPERR will result if the INFs have
11461: # different signs. else, an INF of the same sign is returned
11462: #
11463: fadd_inf_2:
11464: mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11465: mov.b DST_EX(%a1),%d1
11466: eor.b %d1,%d0
11467: bmi.l res_operr # weed out (-INF)+(+INF)
11468:
11469: # ok, so it's not an OPERR. but, we do have to remember to return the
11470: # src INF since that's where the 881/882 gets the j-bit from...
11471:
11472: #
11473: # operands are INF and one of {ZERO, INF, DENORM, NORM}
11474: #
11475: fadd_inf_src:
11476: fmovm.x SRC(%a0),&0x80 # return src INF
11477: tst.b SRC_EX(%a0) # is INF positive?
11478: bpl.b fadd_inf_done # yes; we're done
11479: mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11480: rts
11481:
11482: #
11483: # operands are INF and one of {ZERO, INF, DENORM, NORM}
11484: #
11485: fadd_inf_dst:
11486: fmovm.x DST(%a1),&0x80 # return dst INF
11487: tst.b DST_EX(%a1) # is INF positive?
11488: bpl.b fadd_inf_done # yes; we're done
11489: mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11490: rts
11491:
11492: fadd_inf_done:
11493: mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11494: rts
11495:
11496: #########################################################################
11497: # XDEF **************************************************************** #
11498: # fsub(): emulates the fsub instruction #
11499: # fssub(): emulates the fssub instruction #
11500: # fdsub(): emulates the fdsub instruction #
11501: # #
11502: # XREF **************************************************************** #
11503: # addsub_scaler2() - scale the operands so they won't take exc #
11504: # ovf_res() - return default overflow result #
11505: # unf_res() - return default underflow result #
11506: # res_qnan() - set QNAN result #
11507: # res_snan() - set SNAN result #
11508: # res_operr() - set OPERR result #
11509: # scale_to_zero_src() - set src operand exponent equal to zero #
11510: # scale_to_zero_dst() - set dst operand exponent equal to zero #
11511: # #
11512: # INPUT *************************************************************** #
11513: # a0 = pointer to extended precision source operand #
11514: # a1 = pointer to extended precision destination operand #
11515: # #
11516: # OUTPUT ************************************************************** #
11517: # fp0 = result #
11518: # fp1 = EXOP (if exception occurred) #
11519: # #
11520: # ALGORITHM *********************************************************** #
11521: # Handle NANs, infinities, and zeroes as special cases. Divide #
11522: # norms into extended, single, and double precision. #
11523: # Do subtraction after scaling exponents such that exception won't#
11524: # occur. Then, check result exponent to see if exception would have #
11525: # occurred. If so, return default result and maybe EXOP. Else, insert #
11526: # the correct result exponent and return. Set FPSR bits as appropriate. #
11527: # #
11528: #########################################################################
11529:
11530: global fssub
11531: fssub:
11532: andi.b &0x30,%d0 # clear rnd prec
11533: ori.b &s_mode*0x10,%d0 # insert sgl prec
11534: bra.b fsub
11535:
11536: global fdsub
11537: fdsub:
11538: andi.b &0x30,%d0 # clear rnd prec
11539: ori.b &d_mode*0x10,%d0 # insert dbl prec
11540:
11541: global fsub
11542: fsub:
11543: mov.l %d0,L_SCR3(%a6) # store rnd info
11544:
11545: clr.w %d1
11546: mov.b DTAG(%a6),%d1
11547: lsl.b &0x3,%d1
11548: or.b STAG(%a6),%d1 # combine src tags
11549:
11550: bne.w fsub_not_norm # optimize on non-norm input
11551:
11552: #
11553: # SUB: norms and denorms
11554: #
11555: fsub_norm:
11556: bsr.l addsub_scaler2 # scale exponents
11557:
11558: fsub_zero_entry:
11559: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11560:
11561: fmov.l &0x0,%fpsr # clear FPSR
11562: fmov.l L_SCR3(%a6),%fpcr # set FPCR
11563:
11564: fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11565:
11566: fmov.l &0x0,%fpcr # clear FPCR
11567: fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11568:
11569: or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11570:
11571: fbeq.w fsub_zero_exit # if result zero, end now
11572:
11573: mov.l %d2,-(%sp) # save d2
11574:
11575: fmovm.x &0x01,-(%sp) # save result to stack
11576:
11577: mov.w 2+L_SCR3(%a6),%d1
11578: lsr.b &0x6,%d1
11579:
11580: mov.w (%sp),%d2 # fetch new exponent
11581: andi.l &0x7fff,%d2 # strip sign
11582: sub.l %d0,%d2 # add scale factor
11583:
11584: cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11585: bge.b fsub_ovfl # yes
11586:
11587: cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11588: blt.w fsub_unfl # yes
11589: beq.w fsub_may_unfl # maybe; go find out
11590:
11591: fsub_normal:
11592: mov.w (%sp),%d1
11593: andi.w &0x8000,%d1 # keep sign
11594: or.w %d2,%d1 # insert new exponent
11595: mov.w %d1,(%sp) # insert new exponent
11596:
11597: fmovm.x (%sp)+,&0x80 # return result in fp0
11598:
11599: mov.l (%sp)+,%d2 # restore d2
11600: rts
11601:
11602: fsub_zero_exit:
11603: # fmov.s &0x00000000,%fp0 # return zero in fp0
11604: rts
11605:
11606: tbl_fsub_ovfl:
11607: long 0x7fff # ext ovfl
11608: long 0x407f # sgl ovfl
11609: long 0x43ff # dbl ovfl
11610:
11611: tbl_fsub_unfl:
11612: long 0x0000 # ext unfl
11613: long 0x3f81 # sgl unfl
11614: long 0x3c01 # dbl unfl
11615:
11616: fsub_ovfl:
11617: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11618:
11619: mov.b FPCR_ENABLE(%a6),%d1
11620: andi.b &0x13,%d1 # is OVFL or INEX enabled?
11621: bne.b fsub_ovfl_ena # yes
11622:
11623: add.l &0xc,%sp
11624: fsub_ovfl_dis:
11625: btst &neg_bit,FPSR_CC(%a6) # is result negative?
11626: sne %d1 # set sign param accordingly
11627: mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11628: bsr.l ovf_res # calculate default result
11629: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11630: fmovm.x (%a0),&0x80 # return default result in fp0
11631: mov.l (%sp)+,%d2 # restore d2
11632: rts
11633:
11634: fsub_ovfl_ena:
11635: mov.b L_SCR3(%a6),%d1
11636: andi.b &0xc0,%d1 # is precision extended?
11637: bne.b fsub_ovfl_ena_sd # no
11638:
11639: fsub_ovfl_ena_cont:
11640: mov.w (%sp),%d1 # fetch {sgn,exp}
11641: andi.w &0x8000,%d1 # keep sign
11642: subi.l &0x6000,%d2 # subtract new bias
11643: andi.w &0x7fff,%d2 # clear top bit
11644: or.w %d2,%d1 # concat sign,exp
11645: mov.w %d1,(%sp) # insert new exponent
11646:
11647: fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11648: bra.b fsub_ovfl_dis
11649:
11650: fsub_ovfl_ena_sd:
11651: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11652:
11653: mov.l L_SCR3(%a6),%d1
11654: andi.b &0x30,%d1 # clear rnd prec
11655: fmov.l %d1,%fpcr # set FPCR
11656:
11657: fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11658:
11659: fmov.l &0x0,%fpcr # clear FPCR
11660:
11661: add.l &0xc,%sp
11662: fmovm.x &0x01,-(%sp)
11663: bra.b fsub_ovfl_ena_cont
11664:
11665: fsub_unfl:
11666: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11667:
11668: add.l &0xc,%sp
11669:
11670: fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11671:
11672: fmov.l &rz_mode*0x10,%fpcr # set FPCR
11673: fmov.l &0x0,%fpsr # clear FPSR
11674:
11675: fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11676:
11677: fmov.l &0x0,%fpcr # clear FPCR
11678: fmov.l %fpsr,%d1 # save status
11679:
11680: or.l %d1,USER_FPSR(%a6)
11681:
11682: mov.b FPCR_ENABLE(%a6),%d1
11683: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11684: bne.b fsub_unfl_ena # yes
11685:
11686: fsub_unfl_dis:
11687: fmovm.x &0x80,FP_SCR0(%a6) # store out result
11688:
11689: lea FP_SCR0(%a6),%a0 # pass: result addr
11690: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11691: bsr.l unf_res # calculate default result
11692: or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11693: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11694: mov.l (%sp)+,%d2 # restore d2
11695: rts
11696:
11697: fsub_unfl_ena:
11698: fmovm.x FP_SCR1(%a6),&0x40
11699:
11700: mov.l L_SCR3(%a6),%d1
11701: andi.b &0xc0,%d1 # is precision extended?
11702: bne.b fsub_unfl_ena_sd # no
11703:
11704: fmov.l L_SCR3(%a6),%fpcr # set FPCR
11705:
11706: fsub_unfl_ena_cont:
11707: fmov.l &0x0,%fpsr # clear FPSR
11708:
11709: fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11710:
11711: fmov.l &0x0,%fpcr # clear FPCR
11712:
11713: fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11714: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11715: mov.l %d1,%d2 # make a copy
11716: andi.l &0x7fff,%d1 # strip sign
11717: andi.w &0x8000,%d2 # keep old sign
11718: sub.l %d0,%d1 # add scale factor
11719: addi.l &0x6000,%d1 # subtract new bias
11720: andi.w &0x7fff,%d1 # clear top bit
11721: or.w %d2,%d1 # concat sgn,exp
11722: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11723: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11724: bra.w fsub_unfl_dis
11725:
11726: fsub_unfl_ena_sd:
11727: mov.l L_SCR3(%a6),%d1
11728: andi.b &0x30,%d1 # clear rnd prec
11729: fmov.l %d1,%fpcr # set FPCR
11730:
11731: bra.b fsub_unfl_ena_cont
11732:
11733: #
11734: # result is equal to the smallest normalized number in the selected precision
11735: # if the precision is extended, this result could not have come from an
11736: # underflow that rounded up.
11737: #
11738: fsub_may_unfl:
11739: mov.l L_SCR3(%a6),%d1
11740: andi.b &0xc0,%d1 # fetch rnd prec
11741: beq.w fsub_normal # yes; no underflow occurred
11742:
11743: mov.l 0x4(%sp),%d1
11744: cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11745: bne.w fsub_normal # no; no underflow occurred
11746:
11747: tst.l 0x8(%sp) # is lo(man) = 0x0?
11748: bne.w fsub_normal # no; no underflow occurred
11749:
11750: btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11751: beq.w fsub_normal # no; no underflow occurred
11752:
11753: #
11754: # ok, so now the result has a exponent equal to the smallest normalized
11755: # exponent for the selected precision. also, the mantissa is equal to
11756: # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11757: # g,r,s.
11758: # now, we must determine whether the pre-rounded result was an underflow
11759: # rounded "up" or a normalized number rounded "down".
11760: # so, we do this be re-executing the add using RZ as the rounding mode and
11761: # seeing if the new result is smaller or equal to the current result.
11762: #
11763: fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11764:
11765: mov.l L_SCR3(%a6),%d1
11766: andi.b &0xc0,%d1 # keep rnd prec
11767: ori.b &rz_mode*0x10,%d1 # insert rnd mode
11768: fmov.l %d1,%fpcr # set FPCR
11769: fmov.l &0x0,%fpsr # clear FPSR
11770:
11771: fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11772:
11773: fmov.l &0x0,%fpcr # clear FPCR
11774:
11775: fabs.x %fp0 # compare absolute values
11776: fabs.x %fp1
11777: fcmp.x %fp0,%fp1 # is first result > second?
11778:
11779: fbgt.w fsub_unfl # yes; it's an underflow
11780: bra.w fsub_normal # no; it's not an underflow
11781:
11782: ##########################################################################
11783:
11784: #
11785: # Sub: inputs are not both normalized; what are they?
11786: #
11787: fsub_not_norm:
11788: mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11789: jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11790:
11791: swbeg &48
11792: tbl_fsub_op:
11793: short fsub_norm - tbl_fsub_op # NORM - NORM
11794: short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11795: short fsub_inf_src - tbl_fsub_op # NORM - INF
11796: short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11797: short fsub_norm - tbl_fsub_op # NORM - DENORM
11798: short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11799: short tbl_fsub_op - tbl_fsub_op #
11800: short tbl_fsub_op - tbl_fsub_op #
11801:
11802: short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11803: short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11804: short fsub_inf_src - tbl_fsub_op # ZERO - INF
11805: short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11806: short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11807: short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11808: short tbl_fsub_op - tbl_fsub_op #
11809: short tbl_fsub_op - tbl_fsub_op #
11810:
11811: short fsub_inf_dst - tbl_fsub_op # INF - NORM
11812: short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11813: short fsub_inf_2 - tbl_fsub_op # INF - INF
11814: short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11815: short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11816: short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11817: short tbl_fsub_op - tbl_fsub_op #
11818: short tbl_fsub_op - tbl_fsub_op #
11819:
11820: short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11821: short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11822: short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11823: short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11824: short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11825: short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11826: short tbl_fsub_op - tbl_fsub_op #
11827: short tbl_fsub_op - tbl_fsub_op #
11828:
11829: short fsub_norm - tbl_fsub_op # DENORM - NORM
11830: short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11831: short fsub_inf_src - tbl_fsub_op # DENORM - INF
11832: short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11833: short fsub_norm - tbl_fsub_op # DENORM - DENORM
11834: short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11835: short tbl_fsub_op - tbl_fsub_op #
11836: short tbl_fsub_op - tbl_fsub_op #
11837:
11838: short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11839: short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11840: short fsub_res_snan - tbl_fsub_op # SNAN - INF
11841: short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11842: short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11843: short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11844: short tbl_fsub_op - tbl_fsub_op #
11845: short tbl_fsub_op - tbl_fsub_op #
11846:
11847: fsub_res_qnan:
11848: bra.l res_qnan
11849: fsub_res_snan:
11850: bra.l res_snan
11851:
11852: #
11853: # both operands are ZEROes
11854: #
11855: fsub_zero_2:
11856: mov.b SRC_EX(%a0),%d0
11857: mov.b DST_EX(%a1),%d1
11858: eor.b %d1,%d0
11859: bpl.b fsub_zero_2_chk_rm
11860:
11861: # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11862: tst.b %d0 # is dst negative?
11863: bmi.b fsub_zero_2_rm # yes
11864: fmov.s &0x00000000,%fp0 # no; return +ZERO
11865: mov.b &z_bmask,FPSR_CC(%a6) # set Z
11866: rts
11867:
11868: #
11869: # the ZEROes have the same signs:
11870: # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11871: # - -ZERO is returned in the case of RM.
11872: #
11873: fsub_zero_2_chk_rm:
11874: mov.b 3+L_SCR3(%a6),%d1
11875: andi.b &0x30,%d1 # extract rnd mode
11876: cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11877: beq.b fsub_zero_2_rm # yes
11878: fmov.s &0x00000000,%fp0 # no; return +ZERO
11879: mov.b &z_bmask,FPSR_CC(%a6) # set Z
11880: rts
11881:
11882: fsub_zero_2_rm:
11883: fmov.s &0x80000000,%fp0 # return -ZERO
11884: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11885: rts
11886:
11887: #
11888: # one operand is a ZERO and the other is a DENORM or a NORM.
11889: # scale the DENORM or NORM and jump to the regular fsub routine.
11890: #
11891: fsub_zero_dst:
11892: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11893: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11894: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11895: bsr.l scale_to_zero_src # scale the operand
11896: clr.w FP_SCR1_EX(%a6)
11897: clr.l FP_SCR1_HI(%a6)
11898: clr.l FP_SCR1_LO(%a6)
11899: bra.w fsub_zero_entry # go execute fsub
11900:
11901: fsub_zero_src:
11902: mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11903: mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11904: mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11905: bsr.l scale_to_zero_dst # scale the operand
11906: clr.w FP_SCR0_EX(%a6)
11907: clr.l FP_SCR0_HI(%a6)
11908: clr.l FP_SCR0_LO(%a6)
11909: bra.w fsub_zero_entry # go execute fsub
11910:
11911: #
11912: # both operands are INFs. an OPERR will result if the INFs have the
11913: # same signs. else,
11914: #
11915: fsub_inf_2:
11916: mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11917: mov.b DST_EX(%a1),%d1
11918: eor.b %d1,%d0
11919: bpl.l res_operr # weed out (-INF)+(+INF)
11920:
11921: # ok, so it's not an OPERR. but we do have to remember to return
11922: # the src INF since that's where the 881/882 gets the j-bit.
11923:
11924: fsub_inf_src:
11925: fmovm.x SRC(%a0),&0x80 # return src INF
11926: fneg.x %fp0 # invert sign
11927: fbge.w fsub_inf_done # sign is now positive
11928: mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11929: rts
11930:
11931: fsub_inf_dst:
11932: fmovm.x DST(%a1),&0x80 # return dst INF
11933: tst.b DST_EX(%a1) # is INF negative?
11934: bpl.b fsub_inf_done # no
11935: mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11936: rts
11937:
11938: fsub_inf_done:
11939: mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11940: rts
11941:
11942: #########################################################################
11943: # XDEF **************************************************************** #
11944: # fsqrt(): emulates the fsqrt instruction #
11945: # fssqrt(): emulates the fssqrt instruction #
11946: # fdsqrt(): emulates the fdsqrt instruction #
11947: # #
11948: # XREF **************************************************************** #
11949: # scale_sqrt() - scale the source operand #
11950: # unf_res() - return default underflow result #
11951: # ovf_res() - return default overflow result #
11952: # res_qnan_1op() - return QNAN result #
11953: # res_snan_1op() - return SNAN result #
11954: # #
11955: # INPUT *************************************************************** #
11956: # a0 = pointer to extended precision source operand #
11957: # d0 rnd prec,mode #
11958: # #
11959: # OUTPUT ************************************************************** #
11960: # fp0 = result #
11961: # fp1 = EXOP (if exception occurred) #
11962: # #
11963: # ALGORITHM *********************************************************** #
11964: # Handle NANs, infinities, and zeroes as special cases. Divide #
11965: # norms/denorms into ext/sgl/dbl precision. #
11966: # For norms/denorms, scale the exponents such that a sqrt #
11967: # instruction won't cause an exception. Use the regular fsqrt to #
11968: # compute a result. Check if the regular operands would have taken #
11969: # an exception. If so, return the default overflow/underflow result #
11970: # and return the EXOP if exceptions are enabled. Else, scale the #
11971: # result operand to the proper exponent. #
11972: # #
11973: #########################################################################
11974:
11975: global fssqrt
11976: fssqrt:
11977: andi.b &0x30,%d0 # clear rnd prec
11978: ori.b &s_mode*0x10,%d0 # insert sgl precision
11979: bra.b fsqrt
11980:
11981: global fdsqrt
11982: fdsqrt:
11983: andi.b &0x30,%d0 # clear rnd prec
11984: ori.b &d_mode*0x10,%d0 # insert dbl precision
11985:
11986: global fsqrt
11987: fsqrt:
11988: mov.l %d0,L_SCR3(%a6) # store rnd info
11989: clr.w %d1
11990: mov.b STAG(%a6),%d1
11991: bne.w fsqrt_not_norm # optimize on non-norm input
11992:
11993: #
11994: # SQUARE ROOT: norms and denorms ONLY!
11995: #
11996: fsqrt_norm:
11997: tst.b SRC_EX(%a0) # is operand negative?
11998: bmi.l res_operr # yes
11999:
12000: andi.b &0xc0,%d0 # is precision extended?
12001: bne.b fsqrt_not_ext # no; go handle sgl or dbl
12002:
12003: fmov.l L_SCR3(%a6),%fpcr # set FPCR
12004: fmov.l &0x0,%fpsr # clear FPSR
12005:
12006: fsqrt.x (%a0),%fp0 # execute square root
12007:
12008: fmov.l %fpsr,%d1
12009: or.l %d1,USER_FPSR(%a6) # set N,INEX
12010:
12011: rts
12012:
12013: fsqrt_denorm:
12014: tst.b SRC_EX(%a0) # is operand negative?
12015: bmi.l res_operr # yes
12016:
12017: andi.b &0xc0,%d0 # is precision extended?
12018: bne.b fsqrt_not_ext # no; go handle sgl or dbl
12019:
12020: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12021: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12022: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12023:
12024: bsr.l scale_sqrt # calculate scale factor
12025:
12026: bra.w fsqrt_sd_normal
12027:
12028: #
12029: # operand is either single or double
12030: #
12031: fsqrt_not_ext:
12032: cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12033: bne.w fsqrt_dbl
12034:
12035: #
12036: # operand is to be rounded to single precision
12037: #
12038: fsqrt_sgl:
12039: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12040: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12041: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12042:
12043: bsr.l scale_sqrt # calculate scale factor
12044:
12045: cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12046: beq.w fsqrt_sd_may_unfl
12047: bgt.w fsqrt_sd_unfl # yes; go handle underflow
12048: cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12049: beq.w fsqrt_sd_may_ovfl # maybe; go check
12050: blt.w fsqrt_sd_ovfl # yes; go handle overflow
12051:
12052: #
12053: # operand will NOT overflow or underflow when moved in to the fp reg file
12054: #
12055: fsqrt_sd_normal:
12056: fmov.l &0x0,%fpsr # clear FPSR
12057: fmov.l L_SCR3(%a6),%fpcr # set FPCR
12058:
12059: fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12060:
12061: fmov.l %fpsr,%d1 # save FPSR
12062: fmov.l &0x0,%fpcr # clear FPCR
12063:
12064: or.l %d1,USER_FPSR(%a6) # save INEX2,N
12065:
12066: fsqrt_sd_normal_exit:
12067: mov.l %d2,-(%sp) # save d2
12068: fmovm.x &0x80,FP_SCR0(%a6) # store out result
12069: mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12070: mov.l %d1,%d2 # make a copy
12071: andi.l &0x7fff,%d1 # strip sign
12072: sub.l %d0,%d1 # add scale factor
12073: andi.w &0x8000,%d2 # keep old sign
12074: or.w %d1,%d2 # concat old sign,new exp
12075: mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12076: mov.l (%sp)+,%d2 # restore d2
12077: fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12078: rts
12079:
12080: #
12081: # operand is to be rounded to double precision
12082: #
12083: fsqrt_dbl:
12084: mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12085: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12086: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12087:
12088: bsr.l scale_sqrt # calculate scale factor
12089:
12090: cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12091: beq.w fsqrt_sd_may_unfl
12092: bgt.b fsqrt_sd_unfl # yes; go handle underflow
12093: cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12094: beq.w fsqrt_sd_may_ovfl # maybe; go check
12095: blt.w fsqrt_sd_ovfl # yes; go handle overflow
12096: bra.w fsqrt_sd_normal # no; ho handle normalized op
12097:
12098: # we're on the line here and the distinguishing characteristic is whether
12099: # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12100: # elsewise fall through to underflow.
12101: fsqrt_sd_may_unfl:
12102: btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12103: bne.w fsqrt_sd_normal # yes, so no underflow
12104:
12105: #
12106: # operand WILL underflow when moved in to the fp register file
12107: #
12108: fsqrt_sd_unfl:
12109: bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12110:
12111: fmov.l &rz_mode*0x10,%fpcr # set FPCR
12112: fmov.l &0x0,%fpsr # clear FPSR
12113:
12114: fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12115:
12116: fmov.l %fpsr,%d1 # save status
12117: fmov.l &0x0,%fpcr # clear FPCR
12118:
12119: or.l %d1,USER_FPSR(%a6) # save INEX2,N
12120:
12121: # if underflow or inexact is enabled, go calculate EXOP first.
12122: mov.b FPCR_ENABLE(%a6),%d1
12123: andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12124: bne.b fsqrt_sd_unfl_ena # yes
12125:
12126: fsqrt_sd_unfl_dis:
12127: fmovm.x &0x80,FP_SCR0(%a6) # store out result
12128:
12129: lea FP_SCR0(%a6),%a0 # pass: result addr
12130: mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12131: bsr.l unf_res # calculate default result
12132: or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12133: fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12134: rts
12135:
12136: #
12137: # operand will underflow AND underflow is enabled.
12138: # therefore, we must return the result rounded to extended precision.
12139: #
12140: fsqrt_sd_unfl_ena:
12141: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12142: mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12143: mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12144:
12145: mov.l %d2,-(%sp) # save d2
12146: mov.l %d1,%d2 # make a copy
12147: andi.l &0x7fff,%d1 # strip sign
12148: andi.w &0x8000,%d2 # keep old sign
12149: sub.l %d0,%d1 # subtract scale factor
12150: addi.l &0x6000,%d1 # add new bias
12151: andi.w &0x7fff,%d1
12152: or.w %d2,%d1 # concat new sign,new exp
12153: mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12154: fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12155: mov.l (%sp)+,%d2 # restore d2
12156: bra.b fsqrt_sd_unfl_dis
12157:
12158: #
12159: # operand WILL overflow.
12160: #
12161: fsqrt_sd_ovfl:
12162: fmov.l &0x0,%fpsr # clear FPSR
12163: fmov.l L_SCR3(%a6),%fpcr # set FPCR
12164:
12165: fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12166:
12167: fmov.l &0x0,%fpcr # clear FPCR
12168: fmov.l %fpsr,%d1 # save FPSR
12169:
12170: or.l %d1,USER_FPSR(%a6) # save INEX2,N
12171:
12172: fsqrt_sd_ovfl_tst:
12173: or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12174:
12175: mov.b FPCR_ENABLE(%a6),%d1
12176: andi.b &0x13,%d1 # is OVFL or INEX enabled?
12177: bne.b fsqrt_sd_ovfl_ena # yes
12178:
12179: #
12180: # OVFL is not enabled; therefore, we must create the default result by
12181: # calling ovf_res().
12182: #
12183: fsqrt_sd_ovfl_dis:
12184: btst &neg_bit,FPSR_CC(%a6) # is result negative?
12185: sne %d1 # set sign param accordingly
12186: mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12187: bsr.l ovf_res # calculate default result
12188: or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12189: fmovm.x (%a0),&0x80 # return default result in fp0
12190: rts
12191:
12192: #
12193: # OVFL is enabled.
12194: # the INEX2 bit has already been updated by the round to the correct precision.
12195: # now, round to extended(and don't alter the FPSR).
12196: #
12197: fsqrt_sd_ovfl_ena:
12198: mov.l %d2,-(%sp) # save d2
12199: mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12200: mov.l %d1,%d2 # make a copy
12201: andi.l &0x7fff,%d1 # strip sign
12202: andi.w &0x8000,%d2 # keep old sign
12203: sub.l %d0,%d1 # add scale factor
12204: subi.l &0x6000,%d1 # subtract bias
12205: andi.w &0x7fff,%d1
12206: or.w %d2,%d1 # concat sign,exp
12207: mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12208: fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12209: mov.l (%sp)+,%d2 # restore d2
12210: bra.b fsqrt_sd_ovfl_dis
12211:
12212: #
12213: # the move in MAY underflow. so...
12214: #
12215: fsqrt_sd_may_ovfl:
12216: btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12217: bne.w fsqrt_sd_ovfl # yes, so overflow
12218:
12219: fmov.l &0x0,%fpsr # clear FPSR
12220: fmov.l L_SCR3(%a6),%fpcr # set FPCR
12221:
12222: fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12223:
12224: fmov.l %fpsr,%d1 # save status
12225: fmov.l &0x0,%fpcr # clear FPCR
12226:
12227: or.l %d1,USER_FPSR(%a6) # save INEX2,N
12228:
12229: fmov.x %fp0,%fp1 # make a copy of result
12230: fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12231: fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12232:
12233: # no, it didn't overflow; we have correct result
12234: bra.w fsqrt_sd_normal_exit
12235:
12236: ##########################################################################
12237:
12238: #
12239: # input is not normalized; what is it?
12240: #
12241: fsqrt_not_norm:
12242: cmpi.b %d1,&DENORM # weed out DENORM
12243: beq.w fsqrt_denorm
12244: cmpi.b %d1,&ZERO # weed out ZERO
12245: beq.b fsqrt_zero
12246: cmpi.b %d1,&INF # weed out INF
12247: beq.b fsqrt_inf
12248: cmpi.b %d1,&SNAN # weed out SNAN
12249: beq.l res_snan_1op
12250: bra.l res_qnan_1op
12251:
12252: #
12253: # fsqrt(+0) = +0
12254: # fsqrt(-0) = -0
12255: # fsqrt(+INF) = +INF
12256: # fsqrt(-INF) = OPERR
12257: #
12258: fsqrt_zero:
12259: tst.b SRC_EX(%a0) # is ZERO positive or negative?
12260: bmi.b fsqrt_zero_m # negative
12261: fsqrt_zero_p:
12262: fmov.s &0x00000000,%fp0 # return +ZERO
12263: mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12264: rts
12265: fsqrt_zero_m:
12266: fmov.s &0x80000000,%fp0 # return -ZERO
12267: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12268: rts
12269:
12270: fsqrt_inf:
12271: tst.b SRC_EX(%a0) # is INF positive or negative?
12272: bmi.l res_operr # negative
12273: fsqrt_inf_p:
12274: fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12275: mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12276: rts
12277:
12278: #########################################################################
12279: # XDEF **************************************************************** #
12280: # fetch_dreg(): fetch register according to index in d1 #
12281: # #
12282: # XREF **************************************************************** #
12283: # None #
12284: # #
12285: # INPUT *************************************************************** #
12286: # d1 = index of register to fetch from #
12287: # #
12288: # OUTPUT ************************************************************** #
12289: # d0 = value of register fetched #
12290: # #
12291: # ALGORITHM *********************************************************** #
12292: # According to the index value in d1 which can range from zero #
12293: # to fifteen, load the corresponding register file value (where #
12294: # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12295: # stack. The rest should still be in their original places. #
12296: # #
12297: #########################################################################
12298:
12299: # this routine leaves d1 intact for subsequent store_dreg calls.
12300: global fetch_dreg
12301: fetch_dreg:
12302: mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12303: jmp (tbl_fdreg.b,%pc,%d0.w*1)
12304:
12305: tbl_fdreg:
12306: short fdreg0 - tbl_fdreg
12307: short fdreg1 - tbl_fdreg
12308: short fdreg2 - tbl_fdreg
12309: short fdreg3 - tbl_fdreg
12310: short fdreg4 - tbl_fdreg
12311: short fdreg5 - tbl_fdreg
12312: short fdreg6 - tbl_fdreg
12313: short fdreg7 - tbl_fdreg
12314: short fdreg8 - tbl_fdreg
12315: short fdreg9 - tbl_fdreg
12316: short fdrega - tbl_fdreg
12317: short fdregb - tbl_fdreg
12318: short fdregc - tbl_fdreg
12319: short fdregd - tbl_fdreg
12320: short fdrege - tbl_fdreg
12321: short fdregf - tbl_fdreg
12322:
12323: fdreg0:
12324: mov.l EXC_DREGS+0x0(%a6),%d0
12325: rts
12326: fdreg1:
12327: mov.l EXC_DREGS+0x4(%a6),%d0
12328: rts
12329: fdreg2:
12330: mov.l %d2,%d0
12331: rts
12332: fdreg3:
12333: mov.l %d3,%d0
12334: rts
12335: fdreg4:
12336: mov.l %d4,%d0
12337: rts
12338: fdreg5:
12339: mov.l %d5,%d0
12340: rts
12341: fdreg6:
12342: mov.l %d6,%d0
12343: rts
12344: fdreg7:
12345: mov.l %d7,%d0
12346: rts
12347: fdreg8:
12348: mov.l EXC_DREGS+0x8(%a6),%d0
12349: rts
12350: fdreg9:
12351: mov.l EXC_DREGS+0xc(%a6),%d0
12352: rts
12353: fdrega:
12354: mov.l %a2,%d0
12355: rts
12356: fdregb:
12357: mov.l %a3,%d0
12358: rts
12359: fdregc:
12360: mov.l %a4,%d0
12361: rts
12362: fdregd:
12363: mov.l %a5,%d0
12364: rts
12365: fdrege:
12366: mov.l (%a6),%d0
12367: rts
12368: fdregf:
12369: mov.l EXC_A7(%a6),%d0
12370: rts
12371:
12372: #########################################################################
12373: # XDEF **************************************************************** #
12374: # store_dreg_l(): store longword to data register specified by d1 #
12375: # #
12376: # XREF **************************************************************** #
12377: # None #
12378: # #
12379: # INPUT *************************************************************** #
12380: # d0 = longowrd value to store #
12381: # d1 = index of register to fetch from #
12382: # #
12383: # OUTPUT ************************************************************** #
12384: # (data register is updated) #
12385: # #
12386: # ALGORITHM *********************************************************** #
12387: # According to the index value in d1, store the longword value #
12388: # in d0 to the corresponding data register. D0/D1 are on the stack #
12389: # while the rest are in their initial places. #
12390: # #
12391: #########################################################################
12392:
12393: global store_dreg_l
12394: store_dreg_l:
12395: mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12396: jmp (tbl_sdregl.b,%pc,%d1.w*1)
12397:
12398: tbl_sdregl:
12399: short sdregl0 - tbl_sdregl
12400: short sdregl1 - tbl_sdregl
12401: short sdregl2 - tbl_sdregl
12402: short sdregl3 - tbl_sdregl
12403: short sdregl4 - tbl_sdregl
12404: short sdregl5 - tbl_sdregl
12405: short sdregl6 - tbl_sdregl
12406: short sdregl7 - tbl_sdregl
12407:
12408: sdregl0:
12409: mov.l %d0,EXC_DREGS+0x0(%a6)
12410: rts
12411: sdregl1:
12412: mov.l %d0,EXC_DREGS+0x4(%a6)
12413: rts
12414: sdregl2:
12415: mov.l %d0,%d2
12416: rts
12417: sdregl3:
12418: mov.l %d0,%d3
12419: rts
12420: sdregl4:
12421: mov.l %d0,%d4
12422: rts
12423: sdregl5:
12424: mov.l %d0,%d5
12425: rts
12426: sdregl6:
12427: mov.l %d0,%d6
12428: rts
12429: sdregl7:
12430: mov.l %d0,%d7
12431: rts
12432:
12433: #########################################################################
12434: # XDEF **************************************************************** #
12435: # store_dreg_w(): store word to data register specified by d1 #
12436: # #
12437: # XREF **************************************************************** #
12438: # None #
12439: # #
12440: # INPUT *************************************************************** #
12441: # d0 = word value to store #
12442: # d1 = index of register to fetch from #
12443: # #
12444: # OUTPUT ************************************************************** #
12445: # (data register is updated) #
12446: # #
12447: # ALGORITHM *********************************************************** #
12448: # According to the index value in d1, store the word value #
12449: # in d0 to the corresponding data register. D0/D1 are on the stack #
12450: # while the rest are in their initial places. #
12451: # #
12452: #########################################################################
12453:
12454: global store_dreg_w
12455: store_dreg_w:
12456: mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12457: jmp (tbl_sdregw.b,%pc,%d1.w*1)
12458:
12459: tbl_sdregw:
12460: short sdregw0 - tbl_sdregw
12461: short sdregw1 - tbl_sdregw
12462: short sdregw2 - tbl_sdregw
12463: short sdregw3 - tbl_sdregw
12464: short sdregw4 - tbl_sdregw
12465: short sdregw5 - tbl_sdregw
12466: short sdregw6 - tbl_sdregw
12467: short sdregw7 - tbl_sdregw
12468:
12469: sdregw0:
12470: mov.w %d0,2+EXC_DREGS+0x0(%a6)
12471: rts
12472: sdregw1:
12473: mov.w %d0,2+EXC_DREGS+0x4(%a6)
12474: rts
12475: sdregw2:
12476: mov.w %d0,%d2
12477: rts
12478: sdregw3:
12479: mov.w %d0,%d3
12480: rts
12481: sdregw4:
12482: mov.w %d0,%d4
12483: rts
12484: sdregw5:
12485: mov.w %d0,%d5
12486: rts
12487: sdregw6:
12488: mov.w %d0,%d6
12489: rts
12490: sdregw7:
12491: mov.w %d0,%d7
12492: rts
12493:
12494: #########################################################################
12495: # XDEF **************************************************************** #
12496: # store_dreg_b(): store byte to data register specified by d1 #
12497: # #
12498: # XREF **************************************************************** #
12499: # None #
12500: # #
12501: # INPUT *************************************************************** #
12502: # d0 = byte value to store #
12503: # d1 = index of register to fetch from #
12504: # #
12505: # OUTPUT ************************************************************** #
12506: # (data register is updated) #
12507: # #
12508: # ALGORITHM *********************************************************** #
12509: # According to the index value in d1, store the byte value #
12510: # in d0 to the corresponding data register. D0/D1 are on the stack #
12511: # while the rest are in their initial places. #
12512: # #
12513: #########################################################################
12514:
12515: global store_dreg_b
12516: store_dreg_b:
12517: mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12518: jmp (tbl_sdregb.b,%pc,%d1.w*1)
12519:
12520: tbl_sdregb:
12521: short sdregb0 - tbl_sdregb
12522: short sdregb1 - tbl_sdregb
12523: short sdregb2 - tbl_sdregb
12524: short sdregb3 - tbl_sdregb
12525: short sdregb4 - tbl_sdregb
12526: short sdregb5 - tbl_sdregb
12527: short sdregb6 - tbl_sdregb
12528: short sdregb7 - tbl_sdregb
12529:
12530: sdregb0:
12531: mov.b %d0,3+EXC_DREGS+0x0(%a6)
12532: rts
12533: sdregb1:
12534: mov.b %d0,3+EXC_DREGS+0x4(%a6)
12535: rts
12536: sdregb2:
12537: mov.b %d0,%d2
12538: rts
12539: sdregb3:
12540: mov.b %d0,%d3
12541: rts
12542: sdregb4:
12543: mov.b %d0,%d4
12544: rts
12545: sdregb5:
12546: mov.b %d0,%d5
12547: rts
12548: sdregb6:
12549: mov.b %d0,%d6
12550: rts
12551: sdregb7:
12552: mov.b %d0,%d7
12553: rts
12554:
12555: #########################################################################
12556: # XDEF **************************************************************** #
12557: # inc_areg(): increment an address register by the value in d0 #
12558: # #
12559: # XREF **************************************************************** #
12560: # None #
12561: # #
12562: # INPUT *************************************************************** #
12563: # d0 = amount to increment by #
12564: # d1 = index of address register to increment #
12565: # #
12566: # OUTPUT ************************************************************** #
12567: # (address register is updated) #
12568: # #
12569: # ALGORITHM *********************************************************** #
12570: # Typically used for an instruction w/ a post-increment <ea>, #
12571: # this routine adds the increment value in d0 to the address register #
12572: # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12573: # in their original places. #
12574: # For a7, if the increment amount is one, then we have to #
12575: # increment by two. For any a7 update, set the mia7_flag so that if #
12576: # an access error exception occurs later in emulation, this address #
12577: # register update can be undone. #
12578: # #
12579: #########################################################################
12580:
12581: global inc_areg
12582: inc_areg:
12583: mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12584: jmp (tbl_iareg.b,%pc,%d1.w*1)
12585:
12586: tbl_iareg:
12587: short iareg0 - tbl_iareg
12588: short iareg1 - tbl_iareg
12589: short iareg2 - tbl_iareg
12590: short iareg3 - tbl_iareg
12591: short iareg4 - tbl_iareg
12592: short iareg5 - tbl_iareg
12593: short iareg6 - tbl_iareg
12594: short iareg7 - tbl_iareg
12595:
12596: iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12597: rts
12598: iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12599: rts
12600: iareg2: add.l %d0,%a2
12601: rts
12602: iareg3: add.l %d0,%a3
12603: rts
12604: iareg4: add.l %d0,%a4
12605: rts
12606: iareg5: add.l %d0,%a5
12607: rts
12608: iareg6: add.l %d0,(%a6)
12609: rts
12610: iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12611: cmpi.b %d0,&0x1
12612: beq.b iareg7b
12613: add.l %d0,EXC_A7(%a6)
12614: rts
12615: iareg7b:
12616: addq.l &0x2,EXC_A7(%a6)
12617: rts
12618:
12619: #########################################################################
12620: # XDEF **************************************************************** #
12621: # dec_areg(): decrement an address register by the value in d0 #
12622: # #
12623: # XREF **************************************************************** #
12624: # None #
12625: # #
12626: # INPUT *************************************************************** #
12627: # d0 = amount to decrement by #
12628: # d1 = index of address register to decrement #
12629: # #
12630: # OUTPUT ************************************************************** #
12631: # (address register is updated) #
12632: # #
12633: # ALGORITHM *********************************************************** #
12634: # Typically used for an instruction w/ a pre-decrement <ea>, #
12635: # this routine adds the decrement value in d0 to the address register #
12636: # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12637: # in their original places. #
12638: # For a7, if the decrement amount is one, then we have to #
12639: # decrement by two. For any a7 update, set the mda7_flag so that if #
12640: # an access error exception occurs later in emulation, this address #
12641: # register update can be undone. #
12642: # #
12643: #########################################################################
12644:
12645: global dec_areg
12646: dec_areg:
12647: mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12648: jmp (tbl_dareg.b,%pc,%d1.w*1)
12649:
12650: tbl_dareg:
12651: short dareg0 - tbl_dareg
12652: short dareg1 - tbl_dareg
12653: short dareg2 - tbl_dareg
12654: short dareg3 - tbl_dareg
12655: short dareg4 - tbl_dareg
12656: short dareg5 - tbl_dareg
12657: short dareg6 - tbl_dareg
12658: short dareg7 - tbl_dareg
12659:
12660: dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12661: rts
12662: dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12663: rts
12664: dareg2: sub.l %d0,%a2
12665: rts
12666: dareg3: sub.l %d0,%a3
12667: rts
12668: dareg4: sub.l %d0,%a4
12669: rts
12670: dareg5: sub.l %d0,%a5
12671: rts
12672: dareg6: sub.l %d0,(%a6)
12673: rts
12674: dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12675: cmpi.b %d0,&0x1
12676: beq.b dareg7b
12677: sub.l %d0,EXC_A7(%a6)
12678: rts
12679: dareg7b:
12680: subq.l &0x2,EXC_A7(%a6)
12681: rts
12682:
12683: ##############################################################################
12684:
12685: #########################################################################
12686: # XDEF **************************************************************** #
12687: # load_fpn1(): load FP register value into FP_SRC(a6). #
12688: # #
12689: # XREF **************************************************************** #
12690: # None #
12691: # #
12692: # INPUT *************************************************************** #
12693: # d0 = index of FP register to load #
12694: # #
12695: # OUTPUT ************************************************************** #
12696: # FP_SRC(a6) = value loaded from FP register file #
12697: # #
12698: # ALGORITHM *********************************************************** #
12699: # Using the index in d0, load FP_SRC(a6) with a number from the #
12700: # FP register file. #
12701: # #
12702: #########################################################################
12703:
12704: global load_fpn1
12705: load_fpn1:
12706: mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12707: jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12708:
12709: tbl_load_fpn1:
12710: short load_fpn1_0 - tbl_load_fpn1
12711: short load_fpn1_1 - tbl_load_fpn1
12712: short load_fpn1_2 - tbl_load_fpn1
12713: short load_fpn1_3 - tbl_load_fpn1
12714: short load_fpn1_4 - tbl_load_fpn1
12715: short load_fpn1_5 - tbl_load_fpn1
12716: short load_fpn1_6 - tbl_load_fpn1
12717: short load_fpn1_7 - tbl_load_fpn1
12718:
12719: load_fpn1_0:
12720: mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12721: mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12722: mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12723: lea FP_SRC(%a6), %a0
12724: rts
12725: load_fpn1_1:
12726: mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12727: mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12728: mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12729: lea FP_SRC(%a6), %a0
12730: rts
12731: load_fpn1_2:
12732: fmovm.x &0x20, FP_SRC(%a6)
12733: lea FP_SRC(%a6), %a0
12734: rts
12735: load_fpn1_3:
12736: fmovm.x &0x10, FP_SRC(%a6)
12737: lea FP_SRC(%a6), %a0
12738: rts
12739: load_fpn1_4:
12740: fmovm.x &0x08, FP_SRC(%a6)
12741: lea FP_SRC(%a6), %a0
12742: rts
12743: load_fpn1_5:
12744: fmovm.x &0x04, FP_SRC(%a6)
12745: lea FP_SRC(%a6), %a0
12746: rts
12747: load_fpn1_6:
12748: fmovm.x &0x02, FP_SRC(%a6)
12749: lea FP_SRC(%a6), %a0
12750: rts
12751: load_fpn1_7:
12752: fmovm.x &0x01, FP_SRC(%a6)
12753: lea FP_SRC(%a6), %a0
12754: rts
12755:
12756: #############################################################################
12757:
12758: #########################################################################
12759: # XDEF **************************************************************** #
12760: # load_fpn2(): load FP register value into FP_DST(a6). #
12761: # #
12762: # XREF **************************************************************** #
12763: # None #
12764: # #
12765: # INPUT *************************************************************** #
12766: # d0 = index of FP register to load #
12767: # #
12768: # OUTPUT ************************************************************** #
12769: # FP_DST(a6) = value loaded from FP register file #
12770: # #
12771: # ALGORITHM *********************************************************** #
12772: # Using the index in d0, load FP_DST(a6) with a number from the #
12773: # FP register file. #
12774: # #
12775: #########################################################################
12776:
12777: global load_fpn2
12778: load_fpn2:
12779: mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12780: jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12781:
12782: tbl_load_fpn2:
12783: short load_fpn2_0 - tbl_load_fpn2
12784: short load_fpn2_1 - tbl_load_fpn2
12785: short load_fpn2_2 - tbl_load_fpn2
12786: short load_fpn2_3 - tbl_load_fpn2
12787: short load_fpn2_4 - tbl_load_fpn2
12788: short load_fpn2_5 - tbl_load_fpn2
12789: short load_fpn2_6 - tbl_load_fpn2
12790: short load_fpn2_7 - tbl_load_fpn2
12791:
12792: load_fpn2_0:
12793: mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12794: mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12795: mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12796: lea FP_DST(%a6), %a0
12797: rts
12798: load_fpn2_1:
12799: mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12800: mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12801: mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12802: lea FP_DST(%a6), %a0
12803: rts
12804: load_fpn2_2:
12805: fmovm.x &0x20, FP_DST(%a6)
12806: lea FP_DST(%a6), %a0
12807: rts
12808: load_fpn2_3:
12809: fmovm.x &0x10, FP_DST(%a6)
12810: lea FP_DST(%a6), %a0
12811: rts
12812: load_fpn2_4:
12813: fmovm.x &0x08, FP_DST(%a6)
12814: lea FP_DST(%a6), %a0
12815: rts
12816: load_fpn2_5:
12817: fmovm.x &0x04, FP_DST(%a6)
12818: lea FP_DST(%a6), %a0
12819: rts
12820: load_fpn2_6:
12821: fmovm.x &0x02, FP_DST(%a6)
12822: lea FP_DST(%a6), %a0
12823: rts
12824: load_fpn2_7:
12825: fmovm.x &0x01, FP_DST(%a6)
12826: lea FP_DST(%a6), %a0
12827: rts
12828:
12829: #############################################################################
12830:
12831: #########################################################################
12832: # XDEF **************************************************************** #
12833: # store_fpreg(): store an fp value to the fpreg designated d0. #
12834: # #
12835: # XREF **************************************************************** #
12836: # None #
12837: # #
12838: # INPUT *************************************************************** #
12839: # fp0 = extended precision value to store #
12840: # d0 = index of floating-point register #
12841: # #
12842: # OUTPUT ************************************************************** #
12843: # None #
12844: # #
12845: # ALGORITHM *********************************************************** #
12846: # Store the value in fp0 to the FP register designated by the #
12847: # value in d0. The FP number can be DENORM or SNAN so we have to be #
12848: # careful that we don't take an exception here. #
12849: # #
12850: #########################################################################
12851:
12852: global store_fpreg
12853: store_fpreg:
12854: mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12855: jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12856:
12857: tbl_store_fpreg:
12858: short store_fpreg_0 - tbl_store_fpreg
12859: short store_fpreg_1 - tbl_store_fpreg
12860: short store_fpreg_2 - tbl_store_fpreg
12861: short store_fpreg_3 - tbl_store_fpreg
12862: short store_fpreg_4 - tbl_store_fpreg
12863: short store_fpreg_5 - tbl_store_fpreg
12864: short store_fpreg_6 - tbl_store_fpreg
12865: short store_fpreg_7 - tbl_store_fpreg
12866:
12867: store_fpreg_0:
12868: fmovm.x &0x80, EXC_FP0(%a6)
12869: rts
12870: store_fpreg_1:
12871: fmovm.x &0x80, EXC_FP1(%a6)
12872: rts
12873: store_fpreg_2:
12874: fmovm.x &0x01, -(%sp)
12875: fmovm.x (%sp)+, &0x20
12876: rts
12877: store_fpreg_3:
12878: fmovm.x &0x01, -(%sp)
12879: fmovm.x (%sp)+, &0x10
12880: rts
12881: store_fpreg_4:
12882: fmovm.x &0x01, -(%sp)
12883: fmovm.x (%sp)+, &0x08
12884: rts
12885: store_fpreg_5:
12886: fmovm.x &0x01, -(%sp)
12887: fmovm.x (%sp)+, &0x04
12888: rts
12889: store_fpreg_6:
12890: fmovm.x &0x01, -(%sp)
12891: fmovm.x (%sp)+, &0x02
12892: rts
12893: store_fpreg_7:
12894: fmovm.x &0x01, -(%sp)
12895: fmovm.x (%sp)+, &0x01
12896: rts
12897:
12898: #########################################################################
12899: # XDEF **************************************************************** #
12900: # get_packed(): fetch a packed operand from memory and then #
12901: # convert it to a floating-point binary number. #
12902: # #
12903: # XREF **************************************************************** #
12904: # _dcalc_ea() - calculate the correct <ea> #
12905: # _mem_read() - fetch the packed operand from memory #
12906: # facc_in_x() - the fetch failed so jump to special exit code #
12907: # decbin() - convert packed to binary extended precision #
12908: # #
12909: # INPUT *************************************************************** #
12910: # None #
12911: # #
12912: # OUTPUT ************************************************************** #
12913: # If no failure on _mem_read(): #
12914: # FP_SRC(a6) = packed operand now as a binary FP number #
12915: # #
12916: # ALGORITHM *********************************************************** #
12917: # Get the correct <ea> which is the value on the exception stack #
12918: # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12919: # Then, fetch the operand from memory. If the fetch fails, exit #
12920: # through facc_in_x(). #
12921: # If the packed operand is a ZERO,NAN, or INF, convert it to #
12922: # its binary representation here. Else, call decbin() which will #
12923: # convert the packed value to an extended precision binary value. #
12924: # #
12925: #########################################################################
12926:
12927: # the stacked <ea> for packed is correct except for -(An).
12928: # the base reg must be updated for both -(An) and (An)+.
12929: global get_packed
12930: get_packed:
12931: mov.l &0xc,%d0 # packed is 12 bytes
12932: bsr.l _dcalc_ea # fetch <ea>; correct An
12933:
12934: lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12935: mov.l &0xc,%d0 # pass: 12 bytes
12936: bsr.l _dmem_read # read packed operand
12937:
12938: tst.l %d1 # did dfetch fail?
12939: bne.l facc_in_x # yes
12940:
12941: # The packed operand is an INF or a NAN if the exponent field is all ones.
12942: bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12943: cmpi.w %d0,&0x7fff # INF or NAN?
12944: bne.b gp_try_zero # no
12945: rts # operand is an INF or NAN
12946:
12947: # The packed operand is a zero if the mantissa is all zero, else it's
12948: # a normal packed op.
12949: gp_try_zero:
12950: mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12951: andi.b &0x0f,%d0 # clear all but last nybble
12952: bne.b gp_not_spec # not a zero
12953: tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12954: bne.b gp_not_spec # not a zero
12955: tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12956: bne.b gp_not_spec # not a zero
12957: rts # operand is a ZERO
12958: gp_not_spec:
12959: lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12960: bsr.l decbin # convert to extended
12961: fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12962: rts
12963:
12964: #########################################################################
12965: # decbin(): Converts normalized packed bcd value pointed to by register #
12966: # a0 to extended-precision value in fp0. #
12967: # #
12968: # INPUT *************************************************************** #
12969: # a0 = pointer to normalized packed bcd value #
12970: # #
12971: # OUTPUT ************************************************************** #
12972: # fp0 = exact fp representation of the packed bcd value. #
12973: # #
12974: # ALGORITHM *********************************************************** #
12975: # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12976: # and NaN operands are dispatched without entering this routine) #
12977: # value in 68881/882 format at location (a0). #
12978: # #
12979: # A1. Convert the bcd exponent to binary by successive adds and #
12980: # muls. Set the sign according to SE. Subtract 16 to compensate #
12981: # for the mantissa which is to be interpreted as 17 integer #
12982: # digits, rather than 1 integer and 16 fraction digits. #
12983: # Note: this operation can never overflow. #
12984: # #
12985: # A2. Convert the bcd mantissa to binary by successive #
12986: # adds and muls in FP0. Set the sign according to SM. #
12987: # The mantissa digits will be converted with the decimal point #
12988: # assumed following the least-significant digit. #
12989: # Note: this operation can never overflow. #
12990: # #
12991: # A3. Count the number of leading/trailing zeros in the #
12992: # bcd string. If SE is positive, count the leading zeros; #
12993: # if negative, count the trailing zeros. Set the adjusted #
12994: # exponent equal to the exponent from A1 and the zero count #
12995: # added if SM = 1 and subtracted if SM = 0. Scale the #
12996: # mantissa the equivalent of forcing in the bcd value: #
12997: # #
12998: # SM = 0 a non-zero digit in the integer position #
12999: # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
13000: # #
13001: # this will insure that any value, regardless of its #
13002: # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
13003: # consistently. #
13004: # #
13005: # A4. Calculate the factor 10^exp in FP1 using a table of #
13006: # 10^(2^n) values. To reduce the error in forming factors #
13007: # greater than 10^27, a directed rounding scheme is used with #
13008: # tables rounded to RN, RM, and RP, according to the table #
13009: # in the comments of the pwrten section. #
13010: # #
13011: # A5. Form the final binary number by scaling the mantissa by #
13012: # the exponent factor. This is done by multiplying the #
13013: # mantissa in FP0 by the factor in FP1 if the adjusted #
13014: # exponent sign is positive, and dividing FP0 by FP1 if #
13015: # it is negative. #
13016: # #
13017: # Clean up and return. Check if the final mul or div was inexact. #
13018: # If so, set INEX1 in USER_FPSR. #
13019: # #
13020: #########################################################################
13021:
13022: #
13023: # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13024: # to nearest, minus, and plus, respectively. The tables include
13025: # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13026: # is required until the power is greater than 27, however, all
13027: # tables include the first 5 for ease of indexing.
13028: #
13029: RTABLE:
13030: byte 0,0,0,0
13031: byte 2,3,2,3
13032: byte 2,3,3,2
13033: byte 3,2,2,3
13034:
13035: set FNIBS,7
13036: set FSTRT,0
13037:
13038: set ESTRT,4
13039: set EDIGITS,2
13040:
13041: global decbin
13042: decbin:
13043: mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13044: mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13045: mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13046:
13047: lea FP_SCR0(%a6),%a0
13048:
13049: movm.l &0x3c00,-(%sp) # save d2-d5
13050: fmovm.x &0x1,-(%sp) # save fp1
13051: #
13052: # Calculate exponent:
13053: # 1. Copy bcd value in memory for use as a working copy.
13054: # 2. Calculate absolute value of exponent in d1 by mul and add.
13055: # 3. Correct for exponent sign.
13056: # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13057: # (i.e., all digits assumed left of the decimal point.)
13058: #
13059: # Register usage:
13060: #
13061: # calc_e:
13062: # (*) d0: temp digit storage
13063: # (*) d1: accumulator for binary exponent
13064: # (*) d2: digit count
13065: # (*) d3: offset pointer
13066: # ( ) d4: first word of bcd
13067: # ( ) a0: pointer to working bcd value
13068: # ( ) a6: pointer to original bcd value
13069: # (*) FP_SCR1: working copy of original bcd value
13070: # (*) L_SCR1: copy of original exponent word
13071: #
13072: calc_e:
13073: mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13074: mov.l &ESTRT,%d3 # counter to pick up digits
13075: mov.l (%a0),%d4 # get first word of bcd
13076: clr.l %d1 # zero d1 for accumulator
13077: e_gd:
13078: mulu.l &0xa,%d1 # mul partial product by one digit place
13079: bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13080: add.l %d0,%d1 # d1 = d1 + d0
13081: addq.b &4,%d3 # advance d3 to the next digit
13082: dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13083: btst &30,%d4 # get SE
13084: beq.b e_pos # don't negate if pos
13085: neg.l %d1 # negate before subtracting
13086: e_pos:
13087: sub.l &16,%d1 # sub to compensate for shift of mant
13088: bge.b e_save # if still pos, do not neg
13089: neg.l %d1 # now negative, make pos and set SE
13090: or.l &0x40000000,%d4 # set SE in d4,
13091: or.l &0x40000000,(%a0) # and in working bcd
13092: e_save:
13093: mov.l %d1,-(%sp) # save exp on stack
13094: #
13095: #
13096: # Calculate mantissa:
13097: # 1. Calculate absolute value of mantissa in fp0 by mul and add.
13098: # 2. Correct for mantissa sign.
13099: # (i.e., all digits assumed left of the decimal point.)
13100: #
13101: # Register usage:
13102: #
13103: # calc_m:
13104: # (*) d0: temp digit storage
13105: # (*) d1: lword counter
13106: # (*) d2: digit count
13107: # (*) d3: offset pointer
13108: # ( ) d4: words 2 and 3 of bcd
13109: # ( ) a0: pointer to working bcd value
13110: # ( ) a6: pointer to original bcd value
13111: # (*) fp0: mantissa accumulator
13112: # ( ) FP_SCR1: working copy of original bcd value
13113: # ( ) L_SCR1: copy of original exponent word
13114: #
13115: calc_m:
13116: mov.l &1,%d1 # word counter, init to 1
13117: fmov.s &0x00000000,%fp0 # accumulator
13118: #
13119: #
13120: # Since the packed number has a long word between the first & second parts,
13121: # get the integer digit then skip down & get the rest of the
13122: # mantissa. We will unroll the loop once.
13123: #
13124: bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13125: fadd.b %d0,%fp0 # add digit to sum in fp0
13126: #
13127: #
13128: # Get the rest of the mantissa.
13129: #
13130: loadlw:
13131: mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13132: mov.l &FSTRT,%d3 # counter to pick up digits
13133: mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13134: md2b:
13135: fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13136: bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13137: fadd.b %d0,%fp0 # fp0 = fp0 + digit
13138: #
13139: #
13140: # If all the digits (8) in that long word have been converted (d2=0),
13141: # then inc d1 (=2) to point to the next long word and reset d3 to 0
13142: # to initialize the digit offset, and set d2 to 7 for the digit count;
13143: # else continue with this long word.
13144: #
13145: addq.b &4,%d3 # advance d3 to the next digit
13146: dbf.w %d2,md2b # check for last digit in this lw
13147: nextlw:
13148: addq.l &1,%d1 # inc lw pointer in mantissa
13149: cmp.l %d1,&2 # test for last lw
13150: ble.b loadlw # if not, get last one
13151: #
13152: # Check the sign of the mant and make the value in fp0 the same sign.
13153: #
13154: m_sign:
13155: btst &31,(%a0) # test sign of the mantissa
13156: beq.b ap_st_z # if clear, go to append/strip zeros
13157: fneg.x %fp0 # if set, negate fp0
13158: #
13159: # Append/strip zeros:
13160: #
13161: # For adjusted exponents which have an absolute value greater than 27*,
13162: # this routine calculates the amount needed to normalize the mantissa
13163: # for the adjusted exponent. That number is subtracted from the exp
13164: # if the exp was positive, and added if it was negative. The purpose
13165: # of this is to reduce the value of the exponent and the possibility
13166: # of error in calculation of pwrten.
13167: #
13168: # 1. Branch on the sign of the adjusted exponent.
13169: # 2p.(positive exp)
13170: # 2. Check M16 and the digits in lwords 2 and 3 in descending order.
13171: # 3. Add one for each zero encountered until a non-zero digit.
13172: # 4. Subtract the count from the exp.
13173: # 5. Check if the exp has crossed zero in #3 above; make the exp abs
13174: # and set SE.
13175: # 6. Multiply the mantissa by 10**count.
13176: # 2n.(negative exp)
13177: # 2. Check the digits in lwords 3 and 2 in descending order.
13178: # 3. Add one for each zero encountered until a non-zero digit.
13179: # 4. Add the count to the exp.
13180: # 5. Check if the exp has crossed zero in #3 above; clear SE.
13181: # 6. Divide the mantissa by 10**count.
13182: #
13183: # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13184: # any adjustment due to append/strip zeros will drive the resultane
13185: # exponent towards zero. Since all pwrten constants with a power
13186: # of 27 or less are exact, there is no need to use this routine to
13187: # attempt to lessen the resultant exponent.
13188: #
13189: # Register usage:
13190: #
13191: # ap_st_z:
13192: # (*) d0: temp digit storage
13193: # (*) d1: zero count
13194: # (*) d2: digit count
13195: # (*) d3: offset pointer
13196: # ( ) d4: first word of bcd
13197: # (*) d5: lword counter
13198: # ( ) a0: pointer to working bcd value
13199: # ( ) FP_SCR1: working copy of original bcd value
13200: # ( ) L_SCR1: copy of original exponent word
13201: #
13202: #
13203: # First check the absolute value of the exponent to see if this
13204: # routine is necessary. If so, then check the sign of the exponent
13205: # and do append (+) or strip (-) zeros accordingly.
13206: # This section handles a positive adjusted exponent.
13207: #
13208: ap_st_z:
13209: mov.l (%sp),%d1 # load expA for range test
13210: cmp.l %d1,&27 # test is with 27
13211: ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13212: btst &30,(%a0) # check sign of exp
13213: bne.b ap_st_n # if neg, go to neg side
13214: clr.l %d1 # zero count reg
13215: mov.l (%a0),%d4 # load lword 1 to d4
13216: bfextu %d4{&28:&4},%d0 # get M16 in d0
13217: bne.b ap_p_fx # if M16 is non-zero, go fix exp
13218: addq.l &1,%d1 # inc zero count
13219: mov.l &1,%d5 # init lword counter
13220: mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13221: bne.b ap_p_cl # if lw 2 is zero, skip it
13222: addq.l &8,%d1 # and inc count by 8
13223: addq.l &1,%d5 # inc lword counter
13224: mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13225: ap_p_cl:
13226: clr.l %d3 # init offset reg
13227: mov.l &7,%d2 # init digit counter
13228: ap_p_gd:
13229: bfextu %d4{%d3:&4},%d0 # get digit
13230: bne.b ap_p_fx # if non-zero, go to fix exp
13231: addq.l &4,%d3 # point to next digit
13232: addq.l &1,%d1 # inc digit counter
13233: dbf.w %d2,ap_p_gd # get next digit
13234: ap_p_fx:
13235: mov.l %d1,%d0 # copy counter to d2
13236: mov.l (%sp),%d1 # get adjusted exp from memory
13237: sub.l %d0,%d1 # subtract count from exp
13238: bge.b ap_p_fm # if still pos, go to pwrten
13239: neg.l %d1 # now its neg; get abs
13240: mov.l (%a0),%d4 # load lword 1 to d4
13241: or.l &0x40000000,%d4 # and set SE in d4
13242: or.l &0x40000000,(%a0) # and in memory
13243: #
13244: # Calculate the mantissa multiplier to compensate for the striping of
13245: # zeros from the mantissa.
13246: #
13247: ap_p_fm:
13248: lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13249: clr.l %d3 # init table index
13250: fmov.s &0x3f800000,%fp1 # init fp1 to 1
13251: mov.l &3,%d2 # init d2 to count bits in counter
13252: ap_p_el:
13253: asr.l &1,%d0 # shift lsb into carry
13254: bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13255: fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13256: ap_p_en:
13257: add.l &12,%d3 # inc d3 to next rtable entry
13258: tst.l %d0 # check if d0 is zero
13259: bne.b ap_p_el # if not, get next bit
13260: fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13261: bra.b pwrten # go calc pwrten
13262: #
13263: # This section handles a negative adjusted exponent.
13264: #
13265: ap_st_n:
13266: clr.l %d1 # clr counter
13267: mov.l &2,%d5 # set up d5 to point to lword 3
13268: mov.l (%a0,%d5.L*4),%d4 # get lword 3
13269: bne.b ap_n_cl # if not zero, check digits
13270: sub.l &1,%d5 # dec d5 to point to lword 2
13271: addq.l &8,%d1 # inc counter by 8
13272: mov.l (%a0,%d5.L*4),%d4 # get lword 2
13273: ap_n_cl:
13274: mov.l &28,%d3 # point to last digit
13275: mov.l &7,%d2 # init digit counter
13276: ap_n_gd:
13277: bfextu %d4{%d3:&4},%d0 # get digit
13278: bne.b ap_n_fx # if non-zero, go to exp fix
13279: subq.l &4,%d3 # point to previous digit
13280: addq.l &1,%d1 # inc digit counter
13281: dbf.w %d2,ap_n_gd # get next digit
13282: ap_n_fx:
13283: mov.l %d1,%d0 # copy counter to d0
13284: mov.l (%sp),%d1 # get adjusted exp from memory
13285: sub.l %d0,%d1 # subtract count from exp
13286: bgt.b ap_n_fm # if still pos, go fix mantissa
13287: neg.l %d1 # take abs of exp and clr SE
13288: mov.l (%a0),%d4 # load lword 1 to d4
13289: and.l &0xbfffffff,%d4 # and clr SE in d4
13290: and.l &0xbfffffff,(%a0) # and in memory
13291: #
13292: # Calculate the mantissa multiplier to compensate for the appending of
13293: # zeros to the mantissa.
13294: #
13295: ap_n_fm:
13296: lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13297: clr.l %d3 # init table index
13298: fmov.s &0x3f800000,%fp1 # init fp1 to 1
13299: mov.l &3,%d2 # init d2 to count bits in counter
13300: ap_n_el:
13301: asr.l &1,%d0 # shift lsb into carry
13302: bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13303: fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13304: ap_n_en:
13305: add.l &12,%d3 # inc d3 to next rtable entry
13306: tst.l %d0 # check if d0 is zero
13307: bne.b ap_n_el # if not, get next bit
13308: fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13309: #
13310: #
13311: # Calculate power-of-ten factor from adjusted and shifted exponent.
13312: #
13313: # Register usage:
13314: #
13315: # pwrten:
13316: # (*) d0: temp
13317: # ( ) d1: exponent
13318: # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13319: # (*) d3: FPCR work copy
13320: # ( ) d4: first word of bcd
13321: # (*) a1: RTABLE pointer
13322: # calc_p:
13323: # (*) d0: temp
13324: # ( ) d1: exponent
13325: # (*) d3: PWRTxx table index
13326: # ( ) a0: pointer to working copy of bcd
13327: # (*) a1: PWRTxx pointer
13328: # (*) fp1: power-of-ten accumulator
13329: #
13330: # Pwrten calculates the exponent factor in the selected rounding mode
13331: # according to the following table:
13332: #
13333: # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13334: #
13335: # ANY ANY RN RN
13336: #
13337: # + + RP RP
13338: # - + RP RM
13339: # + - RP RM
13340: # - - RP RP
13341: #
13342: # + + RM RM
13343: # - + RM RP
13344: # + - RM RP
13345: # - - RM RM
13346: #
13347: # + + RZ RM
13348: # - + RZ RM
13349: # + - RZ RP
13350: # - - RZ RP
13351: #
13352: #
13353: pwrten:
13354: mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13355: bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13356: mov.l (%a0),%d4 # reload 1st bcd word to d4
13357: asl.l &2,%d2 # format d2 to be
13358: bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13359: add.l %d0,%d2 # in d2 as index into RTABLE
13360: lea.l RTABLE(%pc),%a1 # load rtable base
13361: mov.b (%a1,%d2),%d0 # load new rounding bits from table
13362: clr.l %d3 # clear d3 to force no exc and extended
13363: bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13364: fmov.l %d3,%fpcr # write new FPCR
13365: asr.l &1,%d0 # write correct PTENxx table
13366: bcc.b not_rp # to a1
13367: lea.l PTENRP(%pc),%a1 # it is RP
13368: bra.b calc_p # go to init section
13369: not_rp:
13370: asr.l &1,%d0 # keep checking
13371: bcc.b not_rm
13372: lea.l PTENRM(%pc),%a1 # it is RM
13373: bra.b calc_p # go to init section
13374: not_rm:
13375: lea.l PTENRN(%pc),%a1 # it is RN
13376: calc_p:
13377: mov.l %d1,%d0 # copy exp to d0;use d0
13378: bpl.b no_neg # if exp is negative,
13379: neg.l %d0 # invert it
13380: or.l &0x40000000,(%a0) # and set SE bit
13381: no_neg:
13382: clr.l %d3 # table index
13383: fmov.s &0x3f800000,%fp1 # init fp1 to 1
13384: e_loop:
13385: asr.l &1,%d0 # shift next bit into carry
13386: bcc.b e_next # if zero, skip the mul
13387: fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13388: e_next:
13389: add.l &12,%d3 # inc d3 to next rtable entry
13390: tst.l %d0 # check if d0 is zero
13391: bne.b e_loop # not zero, continue shifting
13392: #
13393: #
13394: # Check the sign of the adjusted exp and make the value in fp0 the
13395: # same sign. If the exp was pos then multiply fp1*fp0;
13396: # else divide fp0/fp1.
13397: #
13398: # Register Usage:
13399: # norm:
13400: # ( ) a0: pointer to working bcd value
13401: # (*) fp0: mantissa accumulator
13402: # ( ) fp1: scaling factor - 10**(abs(exp))
13403: #
13404: pnorm:
13405: btst &30,(%a0) # test the sign of the exponent
13406: beq.b mul # if clear, go to multiply
13407: div:
13408: fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13409: bra.b end_dec
13410: mul:
13411: fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13412: #
13413: #
13414: # Clean up and return with result in fp0.
13415: #
13416: # If the final mul/div in decbin incurred an inex exception,
13417: # it will be inex2, but will be reported as inex1 by get_op.
13418: #
13419: end_dec:
13420: fmov.l %fpsr,%d0 # get status register
13421: bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13422: beq.b no_exc # skip this if no exc
13423: ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13424: no_exc:
13425: add.l &0x4,%sp # clear 1 lw param
13426: fmovm.x (%sp)+,&0x40 # restore fp1
13427: movm.l (%sp)+,&0x3c # restore d2-d5
13428: fmov.l &0x0,%fpcr
13429: fmov.l &0x0,%fpsr
13430: rts
13431:
13432: #########################################################################
13433: # bindec(): Converts an input in extended precision format to bcd format#
13434: # #
13435: # INPUT *************************************************************** #
13436: # a0 = pointer to the input extended precision value in memory. #
13437: # the input may be either normalized, unnormalized, or #
13438: # denormalized. #
13439: # d0 = contains the k-factor sign-extended to 32-bits. #
13440: # #
13441: # OUTPUT ************************************************************** #
13442: # FP_SCR0(a6) = bcd format result on the stack. #
13443: # #
13444: # ALGORITHM *********************************************************** #
13445: # #
13446: # A1. Set RM and size ext; Set SIGMA = sign of input. #
13447: # The k-factor is saved for use in d7. Clear the #
13448: # BINDEC_FLG for separating normalized/denormalized #
13449: # input. If input is unnormalized or denormalized, #
13450: # normalize it. #
13451: # #
13452: # A2. Set X = abs(input). #
13453: # #
13454: # A3. Compute ILOG. #
13455: # ILOG is the log base 10 of the input value. It is #
13456: # approximated by adding e + 0.f when the original #
13457: # value is viewed as 2^^e * 1.f in extended precision. #
13458: # This value is stored in d6. #
13459: # #
13460: # A4. Clr INEX bit. #
13461: # The operation in A3 above may have set INEX2. #
13462: # #
13463: # A5. Set ICTR = 0; #
13464: # ICTR is a flag used in A13. It must be set before the #
13465: # loop entry A6. #
13466: # #
13467: # A6. Calculate LEN. #
13468: # LEN is the number of digits to be displayed. The #
13469: # k-factor can dictate either the total number of digits, #
13470: # if it is a positive number, or the number of digits #
13471: # after the decimal point which are to be included as #
13472: # significant. See the 68882 manual for examples. #
13473: # If LEN is computed to be greater than 17, set OPERR in #
13474: # USER_FPSR. LEN is stored in d4. #
13475: # #
13476: # A7. Calculate SCALE. #
13477: # SCALE is equal to 10^ISCALE, where ISCALE is the number #
13478: # of decimal places needed to insure LEN integer digits #
13479: # in the output before conversion to bcd. LAMBDA is the #
13480: # sign of ISCALE, used in A9. Fp1 contains #
13481: # 10^^(abs(ISCALE)) using a rounding mode which is a #
13482: # function of the original rounding mode and the signs #
13483: # of ISCALE and X. A table is given in the code. #
13484: # #
13485: # A8. Clr INEX; Force RZ. #
13486: # The operation in A3 above may have set INEX2. #
13487: # RZ mode is forced for the scaling operation to insure #
13488: # only one rounding error. The grs bits are collected in #
13489: # the INEX flag for use in A10. #
13490: # #
13491: # A9. Scale X -> Y. #
13492: # The mantissa is scaled to the desired number of #
13493: # significant digits. The excess digits are collected #
13494: # in INEX2. #
13495: # #
13496: # A10. Or in INEX. #
13497: # If INEX is set, round error occurred. This is #
13498: # compensated for by 'or-ing' in the INEX2 flag to #
13499: # the lsb of Y. #
13500: # #
13501: # A11. Restore original FPCR; set size ext. #
13502: # Perform FINT operation in the user's rounding mode. #
13503: # Keep the size to extended. #
13504: # #
13505: # A12. Calculate YINT = FINT(Y) according to user's rounding #
13506: # mode. The FPSP routine sintd0 is used. The output #
13507: # is in fp0. #
13508: # #
13509: # A13. Check for LEN digits. #
13510: # If the int operation results in more than LEN digits, #
13511: # or less than LEN -1 digits, adjust ILOG and repeat from #
13512: # A6. This test occurs only on the first pass. If the #
13513: # result is exactly 10^LEN, decrement ILOG and divide #
13514: # the mantissa by 10. #
13515: # #
13516: # A14. Convert the mantissa to bcd. #
13517: # The binstr routine is used to convert the LEN digit #
13518: # mantissa to bcd in memory. The input to binstr is #
13519: # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13520: # such that the decimal point is to the left of bit 63. #
13521: # The bcd digits are stored in the correct position in #
13522: # the final string area in memory. #
13523: # #
13524: # A15. Convert the exponent to bcd. #
13525: # As in A14 above, the exp is converted to bcd and the #
13526: # digits are stored in the final string. #
13527: # Test the length of the final exponent string. If the #
13528: # length is 4, set operr. #
13529: # #
13530: # A16. Write sign bits to final string. #
13531: # #
13532: #########################################################################
13533:
13534: set BINDEC_FLG, EXC_TEMP # DENORM flag
13535:
13536: # Constants in extended precision
13537: PLOG2:
13538: long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13539: PLOG2UP1:
13540: long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13541:
13542: # Constants in single precision
13543: FONE:
13544: long 0x3F800000,0x00000000,0x00000000,0x00000000
13545: FTWO:
13546: long 0x40000000,0x00000000,0x00000000,0x00000000
13547: FTEN:
13548: long 0x41200000,0x00000000,0x00000000,0x00000000
13549: F4933:
13550: long 0x459A2800,0x00000000,0x00000000,0x00000000
13551:
13552: RBDTBL:
13553: byte 0,0,0,0
13554: byte 3,3,2,2
13555: byte 3,2,2,3
13556: byte 2,3,3,2
13557:
13558: # Implementation Notes:
13559: #
13560: # The registers are used as follows:
13561: #
13562: # d0: scratch; LEN input to binstr
13563: # d1: scratch
13564: # d2: upper 32-bits of mantissa for binstr
13565: # d3: scratch;lower 32-bits of mantissa for binstr
13566: # d4: LEN
13567: # d5: LAMBDA/ICTR
13568: # d6: ILOG
13569: # d7: k-factor
13570: # a0: ptr for original operand/final result
13571: # a1: scratch pointer
13572: # a2: pointer to FP_X; abs(original value) in ext
13573: # fp0: scratch
13574: # fp1: scratch
13575: # fp2: scratch
13576: # F_SCR1:
13577: # F_SCR2:
13578: # L_SCR1:
13579: # L_SCR2:
13580:
13581: global bindec
13582: bindec:
13583: movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13584: fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13585:
13586: # A1. Set RM and size ext. Set SIGMA = sign input;
13587: # The k-factor is saved for use in d7. Clear BINDEC_FLG for
13588: # separating normalized/denormalized input. If the input
13589: # is a denormalized number, set the BINDEC_FLG memory word
13590: # to signal denorm. If the input is unnormalized, normalize
13591: # the input and test for denormalized result.
13592: #
13593: fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13594: mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13595: mov.l %d0,%d7 # move k-factor to d7
13596:
13597: clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13598: cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13599: bne.w A2_str # no; input is a NORM
13600:
13601: #
13602: # Normalize the denorm
13603: #
13604: un_de_norm:
13605: mov.w (%a0),%d0
13606: and.w &0x7fff,%d0 # strip sign of normalized exp
13607: mov.l 4(%a0),%d1
13608: mov.l 8(%a0),%d2
13609: norm_loop:
13610: sub.w &1,%d0
13611: lsl.l &1,%d2
13612: roxl.l &1,%d1
13613: tst.l %d1
13614: bge.b norm_loop
13615: #
13616: # Test if the normalized input is denormalized
13617: #
13618: tst.w %d0
13619: bgt.b pos_exp # if greater than zero, it is a norm
13620: st BINDEC_FLG(%a6) # set flag for denorm
13621: pos_exp:
13622: and.w &0x7fff,%d0 # strip sign of normalized exp
13623: mov.w %d0,(%a0)
13624: mov.l %d1,4(%a0)
13625: mov.l %d2,8(%a0)
13626:
13627: # A2. Set X = abs(input).
13628: #
13629: A2_str:
13630: mov.l (%a0),FP_SCR1(%a6) # move input to work space
13631: mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13632: mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13633: and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13634:
13635: # A3. Compute ILOG.
13636: # ILOG is the log base 10 of the input value. It is approx-
13637: # imated by adding e + 0.f when the original value is viewed
13638: # as 2^^e * 1.f in extended precision. This value is stored
13639: # in d6.
13640: #
13641: # Register usage:
13642: # Input/Output
13643: # d0: k-factor/exponent
13644: # d2: x/x
13645: # d3: x/x
13646: # d4: x/x
13647: # d5: x/x
13648: # d6: x/ILOG
13649: # d7: k-factor/Unchanged
13650: # a0: ptr for original operand/final result
13651: # a1: x/x
13652: # a2: x/x
13653: # fp0: x/float(ILOG)
13654: # fp1: x/x
13655: # fp2: x/x
13656: # F_SCR1:x/x
13657: # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13658: # L_SCR1:x/x
13659: # L_SCR2:first word of X packed/Unchanged
13660:
13661: tst.b BINDEC_FLG(%a6) # check for denorm
13662: beq.b A3_cont # if clr, continue with norm
13663: mov.l &-4933,%d6 # force ILOG = -4933
13664: bra.b A4_str
13665: A3_cont:
13666: mov.w FP_SCR1(%a6),%d0 # move exp to d0
13667: mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13668: fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13669: sub.w &0x3fff,%d0 # strip off bias
13670: fadd.w %d0,%fp0 # add in exp
13671: fsub.s FONE(%pc),%fp0 # subtract off 1.0
13672: fbge.w pos_res # if pos, branch
13673: fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13674: fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13675: bra.b A4_str # go move out ILOG
13676: pos_res:
13677: fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13678: fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13679:
13680:
13681: # A4. Clr INEX bit.
13682: # The operation in A3 above may have set INEX2.
13683:
13684: A4_str:
13685: fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13686:
13687:
13688: # A5. Set ICTR = 0;
13689: # ICTR is a flag used in A13. It must be set before the
13690: # loop entry A6. The lower word of d5 is used for ICTR.
13691:
13692: clr.w %d5 # clear ICTR
13693:
13694: # A6. Calculate LEN.
13695: # LEN is the number of digits to be displayed. The k-factor
13696: # can dictate either the total number of digits, if it is
13697: # a positive number, or the number of digits after the
13698: # original decimal point which are to be included as
13699: # significant. See the 68882 manual for examples.
13700: # If LEN is computed to be greater than 17, set OPERR in
13701: # USER_FPSR. LEN is stored in d4.
13702: #
13703: # Register usage:
13704: # Input/Output
13705: # d0: exponent/Unchanged
13706: # d2: x/x/scratch
13707: # d3: x/x
13708: # d4: exc picture/LEN
13709: # d5: ICTR/Unchanged
13710: # d6: ILOG/Unchanged
13711: # d7: k-factor/Unchanged
13712: # a0: ptr for original operand/final result
13713: # a1: x/x
13714: # a2: x/x
13715: # fp0: float(ILOG)/Unchanged
13716: # fp1: x/x
13717: # fp2: x/x
13718: # F_SCR1:x/x
13719: # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13720: # L_SCR1:x/x
13721: # L_SCR2:first word of X packed/Unchanged
13722:
13723: A6_str:
13724: tst.l %d7 # branch on sign of k
13725: ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13726: mov.l %d7,%d4 # if k > 0, LEN = k
13727: bra.b len_ck # skip to LEN check
13728: k_neg:
13729: mov.l %d6,%d4 # first load ILOG to d4
13730: sub.l %d7,%d4 # subtract off k
13731: addq.l &1,%d4 # add in the 1
13732: len_ck:
13733: tst.l %d4 # LEN check: branch on sign of LEN
13734: ble.b LEN_ng # if neg, set LEN = 1
13735: cmp.l %d4,&17 # test if LEN > 17
13736: ble.b A7_str # if not, forget it
13737: mov.l &17,%d4 # set max LEN = 17
13738: tst.l %d7 # if negative, never set OPERR
13739: ble.b A7_str # if positive, continue
13740: or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13741: bra.b A7_str # finished here
13742: LEN_ng:
13743: mov.l &1,%d4 # min LEN is 1
13744:
13745:
13746: # A7. Calculate SCALE.
13747: # SCALE is equal to 10^ISCALE, where ISCALE is the number
13748: # of decimal places needed to insure LEN integer digits
13749: # in the output before conversion to bcd. LAMBDA is the sign
13750: # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13751: # the rounding mode as given in the following table (see
13752: # Coonen, p. 7.23 as ref.; however, the SCALE variable is
13753: # of opposite sign in bindec.sa from Coonen).
13754: #
13755: # Initial USE
13756: # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13757: # ----------------------------------------------
13758: # RN 00 0 0 00/0 RN
13759: # RN 00 0 1 00/0 RN
13760: # RN 00 1 0 00/0 RN
13761: # RN 00 1 1 00/0 RN
13762: # RZ 01 0 0 11/3 RP
13763: # RZ 01 0 1 11/3 RP
13764: # RZ 01 1 0 10/2 RM
13765: # RZ 01 1 1 10/2 RM
13766: # RM 10 0 0 11/3 RP
13767: # RM 10 0 1 10/2 RM
13768: # RM 10 1 0 10/2 RM
13769: # RM 10 1 1 11/3 RP
13770: # RP 11 0 0 10/2 RM
13771: # RP 11 0 1 11/3 RP
13772: # RP 11 1 0 11/3 RP
13773: # RP 11 1 1 10/2 RM
13774: #
13775: # Register usage:
13776: # Input/Output
13777: # d0: exponent/scratch - final is 0
13778: # d2: x/0 or 24 for A9
13779: # d3: x/scratch - offset ptr into PTENRM array
13780: # d4: LEN/Unchanged
13781: # d5: 0/ICTR:LAMBDA
13782: # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13783: # d7: k-factor/Unchanged
13784: # a0: ptr for original operand/final result
13785: # a1: x/ptr to PTENRM array
13786: # a2: x/x
13787: # fp0: float(ILOG)/Unchanged
13788: # fp1: x/10^ISCALE
13789: # fp2: x/x
13790: # F_SCR1:x/x
13791: # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13792: # L_SCR1:x/x
13793: # L_SCR2:first word of X packed/Unchanged
13794:
13795: A7_str:
13796: tst.l %d7 # test sign of k
13797: bgt.b k_pos # if pos and > 0, skip this
13798: cmp.l %d7,%d6 # test k - ILOG
13799: blt.b k_pos # if ILOG >= k, skip this
13800: mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13801: k_pos:
13802: mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13803: addq.l &1,%d0 # add the 1
13804: sub.l %d4,%d0 # sub off LEN
13805: swap %d5 # use upper word of d5 for LAMBDA
13806: clr.w %d5 # set it zero initially
13807: clr.w %d2 # set up d2 for very small case
13808: tst.l %d0 # test sign of ISCALE
13809: bge.b iscale # if pos, skip next inst
13810: addq.w &1,%d5 # if neg, set LAMBDA true
13811: cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13812: bgt.b no_inf # if false, skip rest
13813: add.l &24,%d0 # add in 24 to iscale
13814: mov.l &24,%d2 # put 24 in d2 for A9
13815: no_inf:
13816: neg.l %d0 # and take abs of ISCALE
13817: iscale:
13818: fmov.s FONE(%pc),%fp1 # init fp1 to 1
13819: bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13820: lsl.w &1,%d1 # put them in bits 2:1
13821: add.w %d5,%d1 # add in LAMBDA
13822: lsl.w &1,%d1 # put them in bits 3:1
13823: tst.l L_SCR2(%a6) # test sign of original x
13824: bge.b x_pos # if pos, don't set bit 0
13825: addq.l &1,%d1 # if neg, set bit 0
13826: x_pos:
13827: lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13828: mov.b (%a2,%d1),%d3 # load d3 with new rmode
13829: lsl.l &4,%d3 # put bits in proper position
13830: fmov.l %d3,%fpcr # load bits into fpu
13831: lsr.l &4,%d3 # put bits in proper position
13832: tst.b %d3 # decode new rmode for pten table
13833: bne.b not_rn # if zero, it is RN
13834: lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13835: bra.b rmode # exit decode
13836: not_rn:
13837: lsr.b &1,%d3 # get lsb in carry
13838: bcc.b not_rp2 # if carry clear, it is RM
13839: lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13840: bra.b rmode # exit decode
13841: not_rp2:
13842: lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13843: rmode:
13844: clr.l %d3 # clr table index
13845: e_loop2:
13846: lsr.l &1,%d0 # shift next bit into carry
13847: bcc.b e_next2 # if zero, skip the mul
13848: fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13849: e_next2:
13850: add.l &12,%d3 # inc d3 to next pwrten table entry
13851: tst.l %d0 # test if ISCALE is zero
13852: bne.b e_loop2 # if not, loop
13853:
13854: # A8. Clr INEX; Force RZ.
13855: # The operation in A3 above may have set INEX2.
13856: # RZ mode is forced for the scaling operation to insure
13857: # only one rounding error. The grs bits are collected in
13858: # the INEX flag for use in A10.
13859: #
13860: # Register usage:
13861: # Input/Output
13862:
13863: fmov.l &0,%fpsr # clr INEX
13864: fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13865:
13866: # A9. Scale X -> Y.
13867: # The mantissa is scaled to the desired number of significant
13868: # digits. The excess digits are collected in INEX2. If mul,
13869: # Check d2 for excess 10 exponential value. If not zero,
13870: # the iscale value would have caused the pwrten calculation
13871: # to overflow. Only a negative iscale can cause this, so
13872: # multiply by 10^(d2), which is now only allowed to be 24,
13873: # with a multiply by 10^8 and 10^16, which is exact since
13874: # 10^24 is exact. If the input was denormalized, we must
13875: # create a busy stack frame with the mul command and the
13876: # two operands, and allow the fpu to complete the multiply.
13877: #
13878: # Register usage:
13879: # Input/Output
13880: # d0: FPCR with RZ mode/Unchanged
13881: # d2: 0 or 24/unchanged
13882: # d3: x/x
13883: # d4: LEN/Unchanged
13884: # d5: ICTR:LAMBDA
13885: # d6: ILOG/Unchanged
13886: # d7: k-factor/Unchanged
13887: # a0: ptr for original operand/final result
13888: # a1: ptr to PTENRM array/Unchanged
13889: # a2: x/x
13890: # fp0: float(ILOG)/X adjusted for SCALE (Y)
13891: # fp1: 10^ISCALE/Unchanged
13892: # fp2: x/x
13893: # F_SCR1:x/x
13894: # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13895: # L_SCR1:x/x
13896: # L_SCR2:first word of X packed/Unchanged
13897:
13898: A9_str:
13899: fmov.x (%a0),%fp0 # load X from memory
13900: fabs.x %fp0 # use abs(X)
13901: tst.w %d5 # LAMBDA is in lower word of d5
13902: bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13903: fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13904: bra.w A10_st # branch to A10
13905:
13906: sc_mul:
13907: tst.b BINDEC_FLG(%a6) # check for denorm
13908: beq.w A9_norm # if norm, continue with mul
13909:
13910: # for DENORM, we must calculate:
13911: # fp0 = input_op * 10^ISCALE * 10^24
13912: # since the input operand is a DENORM, we can't multiply it directly.
13913: # so, we do the multiplication of the exponents and mantissas separately.
13914: # in this way, we avoid underflow on intermediate stages of the
13915: # multiplication and guarantee a result without exception.
13916: fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13917:
13918: mov.w (%sp),%d3 # grab exponent
13919: andi.w &0x7fff,%d3 # clear sign
13920: ori.w &0x8000,(%a0) # make DENORM exp negative
13921: add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13922: subi.w &0x3fff,%d3 # subtract BIAS
13923: add.w 36(%a1),%d3
13924: subi.w &0x3fff,%d3 # subtract BIAS
13925: add.w 48(%a1),%d3
13926: subi.w &0x3fff,%d3 # subtract BIAS
13927:
13928: bmi.w sc_mul_err # is result is DENORM, punt!!!
13929:
13930: andi.w &0x8000,(%sp) # keep sign
13931: or.w %d3,(%sp) # insert new exponent
13932: andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13933: mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13934: mov.l 0x4(%a0),-(%sp)
13935: mov.l &0x3fff0000,-(%sp) # force exp to zero
13936: fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13937: fmul.x (%sp)+,%fp0
13938:
13939: # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13940: # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13941: mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13942: mov.l 36+4(%a1),-(%sp)
13943: mov.l &0x3fff0000,-(%sp) # force exp to zero
13944: mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13945: mov.l 48+4(%a1),-(%sp)
13946: mov.l &0x3fff0000,-(%sp)# force exp to zero
13947: fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13948: fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13949: bra.b A10_st
13950:
13951: sc_mul_err:
13952: bra.b sc_mul_err
13953:
13954: A9_norm:
13955: tst.w %d2 # test for small exp case
13956: beq.b A9_con # if zero, continue as normal
13957: fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13958: fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13959: A9_con:
13960: fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13961:
13962: # A10. Or in INEX.
13963: # If INEX is set, round error occurred. This is compensated
13964: # for by 'or-ing' in the INEX2 flag to the lsb of Y.
13965: #
13966: # Register usage:
13967: # Input/Output
13968: # d0: FPCR with RZ mode/FPSR with INEX2 isolated
13969: # d2: x/x
13970: # d3: x/x
13971: # d4: LEN/Unchanged
13972: # d5: ICTR:LAMBDA
13973: # d6: ILOG/Unchanged
13974: # d7: k-factor/Unchanged
13975: # a0: ptr for original operand/final result
13976: # a1: ptr to PTENxx array/Unchanged
13977: # a2: x/ptr to FP_SCR1(a6)
13978: # fp0: Y/Y with lsb adjusted
13979: # fp1: 10^ISCALE/Unchanged
13980: # fp2: x/x
13981:
13982: A10_st:
13983: fmov.l %fpsr,%d0 # get FPSR
13984: fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13985: lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13986: btst &9,%d0 # check if INEX2 set
13987: beq.b A11_st # if clear, skip rest
13988: or.l &1,8(%a2) # or in 1 to lsb of mantissa
13989: fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13990:
13991:
13992: # A11. Restore original FPCR; set size ext.
13993: # Perform FINT operation in the user's rounding mode. Keep
13994: # the size to extended. The sintdo entry point in the sint
13995: # routine expects the FPCR value to be in USER_FPCR for
13996: # mode and precision. The original FPCR is saved in L_SCR1.
13997:
13998: A11_st:
13999: mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
14000: and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
14001: # ;block exceptions
14002:
14003:
14004: # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
14005: # The FPSP routine sintd0 is used. The output is in fp0.
14006: #
14007: # Register usage:
14008: # Input/Output
14009: # d0: FPSR with AINEX cleared/FPCR with size set to ext
14010: # d2: x/x/scratch
14011: # d3: x/x
14012: # d4: LEN/Unchanged
14013: # d5: ICTR:LAMBDA/Unchanged
14014: # d6: ILOG/Unchanged
14015: # d7: k-factor/Unchanged
14016: # a0: ptr for original operand/src ptr for sintdo
14017: # a1: ptr to PTENxx array/Unchanged
14018: # a2: ptr to FP_SCR1(a6)/Unchanged
14019: # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14020: # fp0: Y/YINT
14021: # fp1: 10^ISCALE/Unchanged
14022: # fp2: x/x
14023: # F_SCR1:x/x
14024: # F_SCR2:Y adjusted for inex/Y with original exponent
14025: # L_SCR1:x/original USER_FPCR
14026: # L_SCR2:first word of X packed/Unchanged
14027:
14028: A12_st:
14029: movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14030: mov.l L_SCR1(%a6),-(%sp)
14031: mov.l L_SCR2(%a6),-(%sp)
14032:
14033: lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14034: fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14035: tst.l L_SCR2(%a6) # test sign of original operand
14036: bge.b do_fint12 # if pos, use Y
14037: or.l &0x80000000,(%a0) # if neg, use -Y
14038: do_fint12:
14039: mov.l USER_FPSR(%a6),-(%sp)
14040: # bsr sintdo # sint routine returns int in fp0
14041:
14042: fmov.l USER_FPCR(%a6),%fpcr
14043: fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14044: ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14045: ## andi.l &0x00000030,%d0
14046: ## fmov.l %d0,%fpcr
14047: fint.x FP_SCR1(%a6),%fp0 # do fint()
14048: fmov.l %fpsr,%d0
14049: or.w %d0,FPSR_EXCEPT(%a6)
14050: ## fmov.l &0x0,%fpcr
14051: ## fmov.l %fpsr,%d0 # don't keep ccodes
14052: ## or.w %d0,FPSR_EXCEPT(%a6)
14053:
14054: mov.b (%sp),USER_FPSR(%a6)
14055: add.l &4,%sp
14056:
14057: mov.l (%sp)+,L_SCR2(%a6)
14058: mov.l (%sp)+,L_SCR1(%a6)
14059: movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14060:
14061: mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14062: mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14063:
14064: # A13. Check for LEN digits.
14065: # If the int operation results in more than LEN digits,
14066: # or less than LEN -1 digits, adjust ILOG and repeat from
14067: # A6. This test occurs only on the first pass. If the
14068: # result is exactly 10^LEN, decrement ILOG and divide
14069: # the mantissa by 10. The calculation of 10^LEN cannot
14070: # be inexact, since all powers of ten upto 10^27 are exact
14071: # in extended precision, so the use of a previous power-of-ten
14072: # table will introduce no error.
14073: #
14074: #
14075: # Register usage:
14076: # Input/Output
14077: # d0: FPCR with size set to ext/scratch final = 0
14078: # d2: x/x
14079: # d3: x/scratch final = x
14080: # d4: LEN/LEN adjusted
14081: # d5: ICTR:LAMBDA/LAMBDA:ICTR
14082: # d6: ILOG/ILOG adjusted
14083: # d7: k-factor/Unchanged
14084: # a0: pointer into memory for packed bcd string formation
14085: # a1: ptr to PTENxx array/Unchanged
14086: # a2: ptr to FP_SCR1(a6)/Unchanged
14087: # fp0: int portion of Y/abs(YINT) adjusted
14088: # fp1: 10^ISCALE/Unchanged
14089: # fp2: x/10^LEN
14090: # F_SCR1:x/x
14091: # F_SCR2:Y with original exponent/Unchanged
14092: # L_SCR1:original USER_FPCR/Unchanged
14093: # L_SCR2:first word of X packed/Unchanged
14094:
14095: A13_st:
14096: swap %d5 # put ICTR in lower word of d5
14097: tst.w %d5 # check if ICTR = 0
14098: bne not_zr # if non-zero, go to second test
14099: #
14100: # Compute 10^(LEN-1)
14101: #
14102: fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14103: mov.l %d4,%d0 # put LEN in d0
14104: subq.l &1,%d0 # d0 = LEN -1
14105: clr.l %d3 # clr table index
14106: l_loop:
14107: lsr.l &1,%d0 # shift next bit into carry
14108: bcc.b l_next # if zero, skip the mul
14109: fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14110: l_next:
14111: add.l &12,%d3 # inc d3 to next pwrten table entry
14112: tst.l %d0 # test if LEN is zero
14113: bne.b l_loop # if not, loop
14114: #
14115: # 10^LEN-1 is computed for this test and A14. If the input was
14116: # denormalized, check only the case in which YINT > 10^LEN.
14117: #
14118: tst.b BINDEC_FLG(%a6) # check if input was norm
14119: beq.b A13_con # if norm, continue with checking
14120: fabs.x %fp0 # take abs of YINT
14121: bra test_2
14122: #
14123: # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14124: #
14125: A13_con:
14126: fabs.x %fp0 # take abs of YINT
14127: fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14128: fbge.w test_2 # if greater, do next test
14129: subq.l &1,%d6 # subtract 1 from ILOG
14130: mov.w &1,%d5 # set ICTR
14131: fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14132: fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14133: bra.w A6_str # return to A6 and recompute YINT
14134: test_2:
14135: fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14136: fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14137: fblt.w A14_st # if less, all is ok, go to A14
14138: fbgt.w fix_ex # if greater, fix and redo
14139: fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14140: addq.l &1,%d6 # and inc ILOG
14141: bra.b A14_st # and continue elsewhere
14142: fix_ex:
14143: addq.l &1,%d6 # increment ILOG by 1
14144: mov.w &1,%d5 # set ICTR
14145: fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14146: bra.w A6_str # return to A6 and recompute YINT
14147: #
14148: # Since ICTR <> 0, we have already been through one adjustment,
14149: # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14150: # 10^LEN is again computed using whatever table is in a1 since the
14151: # value calculated cannot be inexact.
14152: #
14153: not_zr:
14154: fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14155: mov.l %d4,%d0 # put LEN in d0
14156: clr.l %d3 # clr table index
14157: z_loop:
14158: lsr.l &1,%d0 # shift next bit into carry
14159: bcc.b z_next # if zero, skip the mul
14160: fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14161: z_next:
14162: add.l &12,%d3 # inc d3 to next pwrten table entry
14163: tst.l %d0 # test if LEN is zero
14164: bne.b z_loop # if not, loop
14165: fabs.x %fp0 # get abs(YINT)
14166: fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14167: fbneq.w A14_st # if not, skip this
14168: fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14169: addq.l &1,%d6 # and inc ILOG by 1
14170: addq.l &1,%d4 # and inc LEN
14171: fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14172:
14173: # A14. Convert the mantissa to bcd.
14174: # The binstr routine is used to convert the LEN digit
14175: # mantissa to bcd in memory. The input to binstr is
14176: # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14177: # such that the decimal point is to the left of bit 63.
14178: # The bcd digits are stored in the correct position in
14179: # the final string area in memory.
14180: #
14181: #
14182: # Register usage:
14183: # Input/Output
14184: # d0: x/LEN call to binstr - final is 0
14185: # d1: x/0
14186: # d2: x/ms 32-bits of mant of abs(YINT)
14187: # d3: x/ls 32-bits of mant of abs(YINT)
14188: # d4: LEN/Unchanged
14189: # d5: ICTR:LAMBDA/LAMBDA:ICTR
14190: # d6: ILOG
14191: # d7: k-factor/Unchanged
14192: # a0: pointer into memory for packed bcd string formation
14193: # /ptr to first mantissa byte in result string
14194: # a1: ptr to PTENxx array/Unchanged
14195: # a2: ptr to FP_SCR1(a6)/Unchanged
14196: # fp0: int portion of Y/abs(YINT) adjusted
14197: # fp1: 10^ISCALE/Unchanged
14198: # fp2: 10^LEN/Unchanged
14199: # F_SCR1:x/Work area for final result
14200: # F_SCR2:Y with original exponent/Unchanged
14201: # L_SCR1:original USER_FPCR/Unchanged
14202: # L_SCR2:first word of X packed/Unchanged
14203:
14204: A14_st:
14205: fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14206: fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14207: lea.l FP_SCR0(%a6),%a0
14208: fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14209: mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14210: mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14211: clr.l 4(%a0) # zero word 2 of FP_RES
14212: clr.l 8(%a0) # zero word 3 of FP_RES
14213: mov.l (%a0),%d0 # move exponent to d0
14214: swap %d0 # put exponent in lower word
14215: beq.b no_sft # if zero, don't shift
14216: sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14217: tst.l %d0 # check if > 1
14218: bgt.b no_sft # if so, don't shift
14219: neg.l %d0 # make exp positive
14220: m_loop:
14221: lsr.l &1,%d2 # shift d2:d3 right, add 0s
14222: roxr.l &1,%d3 # the number of places
14223: dbf.w %d0,m_loop # given in d0
14224: no_sft:
14225: tst.l %d2 # check for mantissa of zero
14226: bne.b no_zr # if not, go on
14227: tst.l %d3 # continue zero check
14228: beq.b zer_m # if zero, go directly to binstr
14229: no_zr:
14230: clr.l %d1 # put zero in d1 for addx
14231: add.l &0x00000080,%d3 # inc at bit 7
14232: addx.l %d1,%d2 # continue inc
14233: and.l &0xffffff80,%d3 # strip off lsb not used by 882
14234: zer_m:
14235: mov.l %d4,%d0 # put LEN in d0 for binstr call
14236: addq.l &3,%a0 # a0 points to M16 byte in result
14237: bsr binstr # call binstr to convert mant
14238:
14239:
14240: # A15. Convert the exponent to bcd.
14241: # As in A14 above, the exp is converted to bcd and the
14242: # digits are stored in the final string.
14243: #
14244: # Digits are stored in L_SCR1(a6) on return from BINDEC as:
14245: #
14246: # 32 16 15 0
14247: # -----------------------------------------
14248: # | 0 | e3 | e2 | e1 | e4 | X | X | X |
14249: # -----------------------------------------
14250: #
14251: # And are moved into their proper places in FP_SCR0. If digit e4
14252: # is non-zero, OPERR is signaled. In all cases, all 4 digits are
14253: # written as specified in the 881/882 manual for packed decimal.
14254: #
14255: # Register usage:
14256: # Input/Output
14257: # d0: x/LEN call to binstr - final is 0
14258: # d1: x/scratch (0);shift count for final exponent packing
14259: # d2: x/ms 32-bits of exp fraction/scratch
14260: # d3: x/ls 32-bits of exp fraction
14261: # d4: LEN/Unchanged
14262: # d5: ICTR:LAMBDA/LAMBDA:ICTR
14263: # d6: ILOG
14264: # d7: k-factor/Unchanged
14265: # a0: ptr to result string/ptr to L_SCR1(a6)
14266: # a1: ptr to PTENxx array/Unchanged
14267: # a2: ptr to FP_SCR1(a6)/Unchanged
14268: # fp0: abs(YINT) adjusted/float(ILOG)
14269: # fp1: 10^ISCALE/Unchanged
14270: # fp2: 10^LEN/Unchanged
14271: # F_SCR1:Work area for final result/BCD result
14272: # F_SCR2:Y with original exponent/ILOG/10^4
14273: # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14274: # L_SCR2:first word of X packed/Unchanged
14275:
14276: A15_st:
14277: tst.b BINDEC_FLG(%a6) # check for denorm
14278: beq.b not_denorm
14279: ftest.x %fp0 # test for zero
14280: fbeq.w den_zero # if zero, use k-factor or 4933
14281: fmov.l %d6,%fp0 # float ILOG
14282: fabs.x %fp0 # get abs of ILOG
14283: bra.b convrt
14284: den_zero:
14285: tst.l %d7 # check sign of the k-factor
14286: blt.b use_ilog # if negative, use ILOG
14287: fmov.s F4933(%pc),%fp0 # force exponent to 4933
14288: bra.b convrt # do it
14289: use_ilog:
14290: fmov.l %d6,%fp0 # float ILOG
14291: fabs.x %fp0 # get abs of ILOG
14292: bra.b convrt
14293: not_denorm:
14294: ftest.x %fp0 # test for zero
14295: fbneq.w not_zero # if zero, force exponent
14296: fmov.s FONE(%pc),%fp0 # force exponent to 1
14297: bra.b convrt # do it
14298: not_zero:
14299: fmov.l %d6,%fp0 # float ILOG
14300: fabs.x %fp0 # get abs of ILOG
14301: convrt:
14302: fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14303: fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14304: mov.l 4(%a2),%d2 # move word 2 to d2
14305: mov.l 8(%a2),%d3 # move word 3 to d3
14306: mov.w (%a2),%d0 # move exp to d0
14307: beq.b x_loop_fin # if zero, skip the shift
14308: sub.w &0x3ffd,%d0 # subtract off bias
14309: neg.w %d0 # make exp positive
14310: x_loop:
14311: lsr.l &1,%d2 # shift d2:d3 right
14312: roxr.l &1,%d3 # the number of places
14313: dbf.w %d0,x_loop # given in d0
14314: x_loop_fin:
14315: clr.l %d1 # put zero in d1 for addx
14316: add.l &0x00000080,%d3 # inc at bit 6
14317: addx.l %d1,%d2 # continue inc
14318: and.l &0xffffff80,%d3 # strip off lsb not used by 882
14319: mov.l &4,%d0 # put 4 in d0 for binstr call
14320: lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14321: bsr binstr # call binstr to convert exp
14322: mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14323: mov.l &12,%d1 # use d1 for shift count
14324: lsr.l %d1,%d0 # shift d0 right by 12
14325: bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14326: lsr.l %d1,%d0 # shift d0 right by 12
14327: bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14328: tst.b %d0 # check if e4 is zero
14329: beq.b A16_st # if zero, skip rest
14330: or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14331:
14332:
14333: # A16. Write sign bits to final string.
14334: # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14335: #
14336: # Register usage:
14337: # Input/Output
14338: # d0: x/scratch - final is x
14339: # d2: x/x
14340: # d3: x/x
14341: # d4: LEN/Unchanged
14342: # d5: ICTR:LAMBDA/LAMBDA:ICTR
14343: # d6: ILOG/ILOG adjusted
14344: # d7: k-factor/Unchanged
14345: # a0: ptr to L_SCR1(a6)/Unchanged
14346: # a1: ptr to PTENxx array/Unchanged
14347: # a2: ptr to FP_SCR1(a6)/Unchanged
14348: # fp0: float(ILOG)/Unchanged
14349: # fp1: 10^ISCALE/Unchanged
14350: # fp2: 10^LEN/Unchanged
14351: # F_SCR1:BCD result with correct signs
14352: # F_SCR2:ILOG/10^4
14353: # L_SCR1:Exponent digits on return from binstr
14354: # L_SCR2:first word of X packed/Unchanged
14355:
14356: A16_st:
14357: clr.l %d0 # clr d0 for collection of signs
14358: and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14359: tst.l L_SCR2(%a6) # check sign of original mantissa
14360: bge.b mant_p # if pos, don't set SM
14361: mov.l &2,%d0 # move 2 in to d0 for SM
14362: mant_p:
14363: tst.l %d6 # check sign of ILOG
14364: bge.b wr_sgn # if pos, don't set SE
14365: addq.l &1,%d0 # set bit 0 in d0 for SE
14366: wr_sgn:
14367: bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14368:
14369: # Clean up and restore all registers used.
14370:
14371: fmov.l &0,%fpsr # clear possible inex2/ainex bits
14372: fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14373: movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14374: rts
14375:
14376: global PTENRN
14377: PTENRN:
14378: long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14379: long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14380: long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14381: long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14382: long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14383: long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14384: long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14385: long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14386: long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14387: long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14388: long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14389: long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14390: long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14391:
14392: global PTENRP
14393: PTENRP:
14394: long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14395: long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14396: long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14397: long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14398: long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14399: long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14400: long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14401: long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14402: long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14403: long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14404: long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14405: long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14406: long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14407:
14408: global PTENRM
14409: PTENRM:
14410: long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14411: long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14412: long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14413: long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14414: long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14415: long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14416: long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14417: long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14418: long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14419: long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14420: long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14421: long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14422: long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14423:
14424: #########################################################################
14425: # binstr(): Converts a 64-bit binary integer to bcd. #
14426: # #
14427: # INPUT *************************************************************** #
14428: # d2:d3 = 64-bit binary integer #
14429: # d0 = desired length (LEN) #
14430: # a0 = pointer to start in memory for bcd characters #
14431: # (This pointer must point to byte 4 of the first #
14432: # lword of the packed decimal memory string.) #
14433: # #
14434: # OUTPUT ************************************************************** #
14435: # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14436: # #
14437: # ALGORITHM *********************************************************** #
14438: # The 64-bit binary is assumed to have a decimal point before #
14439: # bit 63. The fraction is multiplied by 10 using a mul by 2 #
14440: # shift and a mul by 8 shift. The bits shifted out of the #
14441: # msb form a decimal digit. This process is iterated until #
14442: # LEN digits are formed. #
14443: # #
14444: # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14445: # digit formed will be assumed the least significant. This is #
14446: # to force the first byte formed to have a 0 in the upper 4 bits. #
14447: # #
14448: # A2. Beginning of the loop: #
14449: # Copy the fraction in d2:d3 to d4:d5. #
14450: # #
14451: # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14452: # extracts and shifts. The three msbs from d2 will go into d1. #
14453: # #
14454: # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14455: # will be collected by the carry. #
14456: # #
14457: # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14458: # into d2:d3. D1 will contain the bcd digit formed. #
14459: # #
14460: # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14461: # zero, it is the ls digit. Put the digit in its place in the #
14462: # upper word of d0. If it is the ls digit, write the word #
14463: # from d0 to memory. #
14464: # #
14465: # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14466: # #
14467: #########################################################################
14468:
14469: # Implementation Notes:
14470: #
14471: # The registers are used as follows:
14472: #
14473: # d0: LEN counter
14474: # d1: temp used to form the digit
14475: # d2: upper 32-bits of fraction for mul by 8
14476: # d3: lower 32-bits of fraction for mul by 8
14477: # d4: upper 32-bits of fraction for mul by 2
14478: # d5: lower 32-bits of fraction for mul by 2
14479: # d6: temp for bit-field extracts
14480: # d7: byte digit formation word;digit count {0,1}
14481: # a0: pointer into memory for packed bcd string formation
14482: #
14483:
14484: global binstr
14485: binstr:
14486: movm.l &0xff00,-(%sp) # {%d0-%d7}
14487:
14488: #
14489: # A1: Init d7
14490: #
14491: mov.l &1,%d7 # init d7 for second digit
14492: subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14493: #
14494: # A2. Copy d2:d3 to d4:d5. Start loop.
14495: #
14496: loop:
14497: mov.l %d2,%d4 # copy the fraction before muls
14498: mov.l %d3,%d5 # to d4:d5
14499: #
14500: # A3. Multiply d2:d3 by 8; extract msbs into d1.
14501: #
14502: bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14503: asl.l &3,%d2 # shift d2 left by 3 places
14504: bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14505: asl.l &3,%d3 # shift d3 left by 3 places
14506: or.l %d6,%d2 # or in msbs from d3 into d2
14507: #
14508: # A4. Multiply d4:d5 by 2; add carry out to d1.
14509: #
14510: asl.l &1,%d5 # mul d5 by 2
14511: roxl.l &1,%d4 # mul d4 by 2
14512: swap %d6 # put 0 in d6 lower word
14513: addx.w %d6,%d1 # add in extend from mul by 2
14514: #
14515: # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14516: #
14517: add.l %d5,%d3 # add lower 32 bits
14518: nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14519: addx.l %d4,%d2 # add with extend upper 32 bits
14520: nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14521: addx.w %d6,%d1 # add in extend from add to d1
14522: swap %d6 # with d6 = 0; put 0 in upper word
14523: #
14524: # A6. Test d7 and branch.
14525: #
14526: tst.w %d7 # if zero, store digit & to loop
14527: beq.b first_d # if non-zero, form byte & write
14528: sec_d:
14529: swap %d7 # bring first digit to word d7b
14530: asl.w &4,%d7 # first digit in upper 4 bits d7b
14531: add.w %d1,%d7 # add in ls digit to d7b
14532: mov.b %d7,(%a0)+ # store d7b byte in memory
14533: swap %d7 # put LEN counter in word d7a
14534: clr.w %d7 # set d7a to signal no digits done
14535: dbf.w %d0,loop # do loop some more!
14536: bra.b end_bstr # finished, so exit
14537: first_d:
14538: swap %d7 # put digit word in d7b
14539: mov.w %d1,%d7 # put new digit in d7b
14540: swap %d7 # put LEN counter in word d7a
14541: addq.w &1,%d7 # set d7a to signal first digit done
14542: dbf.w %d0,loop # do loop some more!
14543: swap %d7 # put last digit in string
14544: lsl.w &4,%d7 # move it to upper 4 bits
14545: mov.b %d7,(%a0)+ # store it in memory string
14546: #
14547: # Clean up and return with result in fp0.
14548: #
14549: end_bstr:
14550: movm.l (%sp)+,&0xff # {%d0-%d7}
14551: rts
14552:
14553: #########################################################################
14554: # XDEF **************************************************************** #
14555: # facc_in_b(): dmem_read_byte failed #
14556: # facc_in_w(): dmem_read_word failed #
14557: # facc_in_l(): dmem_read_long failed #
14558: # facc_in_d(): dmem_read of dbl prec failed #
14559: # facc_in_x(): dmem_read of ext prec failed #
14560: # #
14561: # facc_out_b(): dmem_write_byte failed #
14562: # facc_out_w(): dmem_write_word failed #
14563: # facc_out_l(): dmem_write_long failed #
14564: # facc_out_d(): dmem_write of dbl prec failed #
14565: # facc_out_x(): dmem_write of ext prec failed #
14566: # #
14567: # XREF **************************************************************** #
14568: # _real_access() - exit through access error handler #
14569: # #
14570: # INPUT *************************************************************** #
14571: # None #
14572: # #
14573: # OUTPUT ************************************************************** #
14574: # None #
14575: # #
14576: # ALGORITHM *********************************************************** #
14577: # Flow jumps here when an FP data fetch call gets an error #
14578: # result. This means the operating system wants an access error frame #
14579: # made out of the current exception stack frame. #
14580: # So, we first call restore() which makes sure that any updated #
14581: # -(an)+ register gets returned to its pre-exception value and then #
14582: # we change the stack to an access error stack frame. #
14583: # #
14584: #########################################################################
14585:
14586: facc_in_b:
14587: movq.l &0x1,%d0 # one byte
14588: bsr.w restore # fix An
14589:
14590: mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14591: bra.w facc_finish
14592:
14593: facc_in_w:
14594: movq.l &0x2,%d0 # two bytes
14595: bsr.w restore # fix An
14596:
14597: mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14598: bra.b facc_finish
14599:
14600: facc_in_l:
14601: movq.l &0x4,%d0 # four bytes
14602: bsr.w restore # fix An
14603:
14604: mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14605: bra.b facc_finish
14606:
14607: facc_in_d:
14608: movq.l &0x8,%d0 # eight bytes
14609: bsr.w restore # fix An
14610:
14611: mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14612: bra.b facc_finish
14613:
14614: facc_in_x:
14615: movq.l &0xc,%d0 # twelve bytes
14616: bsr.w restore # fix An
14617:
14618: mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14619: bra.b facc_finish
14620:
14621: ################################################################
14622:
14623: facc_out_b:
14624: movq.l &0x1,%d0 # one byte
14625: bsr.w restore # restore An
14626:
14627: mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14628: bra.b facc_finish
14629:
14630: facc_out_w:
14631: movq.l &0x2,%d0 # two bytes
14632: bsr.w restore # restore An
14633:
14634: mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14635: bra.b facc_finish
14636:
14637: facc_out_l:
14638: movq.l &0x4,%d0 # four bytes
14639: bsr.w restore # restore An
14640:
14641: mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14642: bra.b facc_finish
14643:
14644: facc_out_d:
14645: movq.l &0x8,%d0 # eight bytes
14646: bsr.w restore # restore An
14647:
14648: mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14649: bra.b facc_finish
14650:
14651: facc_out_x:
14652: mov.l &0xc,%d0 # twelve bytes
14653: bsr.w restore # restore An
14654:
14655: mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14656:
14657: # here's where we actually create the access error frame from the
14658: # current exception stack frame.
14659: facc_finish:
14660: mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14661:
14662: fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14663: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14664: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14665:
14666: unlk %a6
14667:
14668: mov.l (%sp),-(%sp) # store SR, hi(PC)
14669: mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14670: mov.l 0xc(%sp),0x8(%sp) # store EA
14671: mov.l &0x00000001,0xc(%sp) # store FSLW
14672: mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14673: mov.w &0x4008,0x6(%sp) # store voff
14674:
14675: btst &0x5,(%sp) # supervisor or user mode?
14676: beq.b facc_out2 # user
14677: bset &0x2,0xd(%sp) # set supervisor TM bit
14678:
14679: facc_out2:
14680: bra.l _real_access
14681:
14682: ##################################################################
14683:
14684: # if the effective addressing mode was predecrement or postincrement,
14685: # the emulation has already changed its value to the correct post-
14686: # instruction value. but since we're exiting to the access error
14687: # handler, then AN must be returned to its pre-instruction value.
14688: # we do that here.
14689: restore:
14690: mov.b EXC_OPWORD+0x1(%a6),%d1
14691: andi.b &0x38,%d1 # extract opmode
14692: cmpi.b %d1,&0x18 # postinc?
14693: beq.w rest_inc
14694: cmpi.b %d1,&0x20 # predec?
14695: beq.w rest_dec
14696: rts
14697:
14698: rest_inc:
14699: mov.b EXC_OPWORD+0x1(%a6),%d1
14700: andi.w &0x0007,%d1 # fetch An
14701:
14702: mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14703: jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14704:
14705: tbl_rest_inc:
14706: short ri_a0 - tbl_rest_inc
14707: short ri_a1 - tbl_rest_inc
14708: short ri_a2 - tbl_rest_inc
14709: short ri_a3 - tbl_rest_inc
14710: short ri_a4 - tbl_rest_inc
14711: short ri_a5 - tbl_rest_inc
14712: short ri_a6 - tbl_rest_inc
14713: short ri_a7 - tbl_rest_inc
14714:
14715: ri_a0:
14716: sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14717: rts
14718: ri_a1:
14719: sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14720: rts
14721: ri_a2:
14722: sub.l %d0,%a2 # fix a2
14723: rts
14724: ri_a3:
14725: sub.l %d0,%a3 # fix a3
14726: rts
14727: ri_a4:
14728: sub.l %d0,%a4 # fix a4
14729: rts
14730: ri_a5:
14731: sub.l %d0,%a5 # fix a5
14732: rts
14733: ri_a6:
14734: sub.l %d0,(%a6) # fix stacked a6
14735: rts
14736: # if it's a fmove out instruction, we don't have to fix a7
14737: # because we hadn't changed it yet. if it's an opclass two
14738: # instruction (data moved in) and the exception was in supervisor
14739: # mode, then also also wasn't updated. if it was user mode, then
14740: # restore the correct a7 which is in the USP currently.
14741: ri_a7:
14742: cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14743: bne.b ri_a7_done # out
14744:
14745: btst &0x5,EXC_SR(%a6) # user or supervisor?
14746: bne.b ri_a7_done # supervisor
14747: movc %usp,%a0 # restore USP
14748: sub.l %d0,%a0
14749: movc %a0,%usp
14750: ri_a7_done:
14751: rts
14752:
14753: # need to invert adjustment value if the <ea> was predec
14754: rest_dec:
14755: neg.l %d0
14756: bra.b rest_inc
CVSweb