Annotation of sys/arch/m68k/060sp/fplsp.s, Revision 1.1.1.1
1.1 nbrk 1: #
2: # $OpenBSD: fplsp.s,v 1.3 2003/11/07 10:36:08 miod Exp $
3: # $NetBSD: fplsp.s,v 1.2 1996/05/15 19:47:41 is Exp $
4: #
5:
6: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7: # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
8: # M68000 Hi-Performance Microprocessor Division
9: # M68060 Software Package Production Release
10: #
11: # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
12: # All rights reserved.
13: #
14: # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
15: # To the maximum extent permitted by applicable law,
16: # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
17: # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
18: # FOR A PARTICULAR PURPOSE and any warranty against infringement with
19: # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
20: # and any accompanying written materials.
21: #
22: # To the maximum extent permitted by applicable law,
23: # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
24: # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
25: # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
26: # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27: #
28: # Motorola assumes no responsibility for the maintenance and support
29: # of the SOFTWARE.
30: #
31: # You are hereby granted a copyright license to use, modify, and distribute the
32: # SOFTWARE so long as this entire notice is retained without alteration
33: # in any modified and/or redistributed versions, and that such modified
34: # versions are clearly identified as such.
35: # No licenses are granted by implication, estoppel or otherwise under any
36: # patents or trademarks of Motorola, Inc.
37: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38:
39: #
40: # lfptop.s:
41: # This file is appended to the top of the 060ILSP package
42: # and contains the entry points into the package. The user, in
43: # effect, branches to one of the branch table entries located here.
44: #
45:
46: bra.l _facoss_
47: short 0x0000
48: bra.l _facosd_
49: short 0x0000
50: bra.l _facosx_
51: short 0x0000
52:
53: bra.l _fasins_
54: short 0x0000
55: bra.l _fasind_
56: short 0x0000
57: bra.l _fasinx_
58: short 0x0000
59:
60: bra.l _fatans_
61: short 0x0000
62: bra.l _fatand_
63: short 0x0000
64: bra.l _fatanx_
65: short 0x0000
66:
67: bra.l _fatanhs_
68: short 0x0000
69: bra.l _fatanhd_
70: short 0x0000
71: bra.l _fatanhx_
72: short 0x0000
73:
74: bra.l _fcoss_
75: short 0x0000
76: bra.l _fcosd_
77: short 0x0000
78: bra.l _fcosx_
79: short 0x0000
80:
81: bra.l _fcoshs_
82: short 0x0000
83: bra.l _fcoshd_
84: short 0x0000
85: bra.l _fcoshx_
86: short 0x0000
87:
88: bra.l _fetoxs_
89: short 0x0000
90: bra.l _fetoxd_
91: short 0x0000
92: bra.l _fetoxx_
93: short 0x0000
94:
95: bra.l _fetoxm1s_
96: short 0x0000
97: bra.l _fetoxm1d_
98: short 0x0000
99: bra.l _fetoxm1x_
100: short 0x0000
101:
102: bra.l _fgetexps_
103: short 0x0000
104: bra.l _fgetexpd_
105: short 0x0000
106: bra.l _fgetexpx_
107: short 0x0000
108:
109: bra.l _fgetmans_
110: short 0x0000
111: bra.l _fgetmand_
112: short 0x0000
113: bra.l _fgetmanx_
114: short 0x0000
115:
116: bra.l _flog10s_
117: short 0x0000
118: bra.l _flog10d_
119: short 0x0000
120: bra.l _flog10x_
121: short 0x0000
122:
123: bra.l _flog2s_
124: short 0x0000
125: bra.l _flog2d_
126: short 0x0000
127: bra.l _flog2x_
128: short 0x0000
129:
130: bra.l _flogns_
131: short 0x0000
132: bra.l _flognd_
133: short 0x0000
134: bra.l _flognx_
135: short 0x0000
136:
137: bra.l _flognp1s_
138: short 0x0000
139: bra.l _flognp1d_
140: short 0x0000
141: bra.l _flognp1x_
142: short 0x0000
143:
144: bra.l _fmods_
145: short 0x0000
146: bra.l _fmodd_
147: short 0x0000
148: bra.l _fmodx_
149: short 0x0000
150:
151: bra.l _frems_
152: short 0x0000
153: bra.l _fremd_
154: short 0x0000
155: bra.l _fremx_
156: short 0x0000
157:
158: bra.l _fscales_
159: short 0x0000
160: bra.l _fscaled_
161: short 0x0000
162: bra.l _fscalex_
163: short 0x0000
164:
165: bra.l _fsins_
166: short 0x0000
167: bra.l _fsind_
168: short 0x0000
169: bra.l _fsinx_
170: short 0x0000
171:
172: bra.l _fsincoss_
173: short 0x0000
174: bra.l _fsincosd_
175: short 0x0000
176: bra.l _fsincosx_
177: short 0x0000
178:
179: bra.l _fsinhs_
180: short 0x0000
181: bra.l _fsinhd_
182: short 0x0000
183: bra.l _fsinhx_
184: short 0x0000
185:
186: bra.l _ftans_
187: short 0x0000
188: bra.l _ftand_
189: short 0x0000
190: bra.l _ftanx_
191: short 0x0000
192:
193: bra.l _ftanhs_
194: short 0x0000
195: bra.l _ftanhd_
196: short 0x0000
197: bra.l _ftanhx_
198: short 0x0000
199:
200: bra.l _ftentoxs_
201: short 0x0000
202: bra.l _ftentoxd_
203: short 0x0000
204: bra.l _ftentoxx_
205: short 0x0000
206:
207: bra.l _ftwotoxs_
208: short 0x0000
209: bra.l _ftwotoxd_
210: short 0x0000
211: bra.l _ftwotoxx_
212: short 0x0000
213:
214: bra.l _fabss_
215: short 0x0000
216: bra.l _fabsd_
217: short 0x0000
218: bra.l _fabsx_
219: short 0x0000
220:
221: bra.l _fadds_
222: short 0x0000
223: bra.l _faddd_
224: short 0x0000
225: bra.l _faddx_
226: short 0x0000
227:
228: bra.l _fdivs_
229: short 0x0000
230: bra.l _fdivd_
231: short 0x0000
232: bra.l _fdivx_
233: short 0x0000
234:
235: bra.l _fints_
236: short 0x0000
237: bra.l _fintd_
238: short 0x0000
239: bra.l _fintx_
240: short 0x0000
241:
242: bra.l _fintrzs_
243: short 0x0000
244: bra.l _fintrzd_
245: short 0x0000
246: bra.l _fintrzx_
247: short 0x0000
248:
249: bra.l _fmuls_
250: short 0x0000
251: bra.l _fmuld_
252: short 0x0000
253: bra.l _fmulx_
254: short 0x0000
255:
256: bra.l _fnegs_
257: short 0x0000
258: bra.l _fnegd_
259: short 0x0000
260: bra.l _fnegx_
261: short 0x0000
262:
263: bra.l _fsqrts_
264: short 0x0000
265: bra.l _fsqrtd_
266: short 0x0000
267: bra.l _fsqrtx_
268: short 0x0000
269:
270: bra.l _fsubs_
271: short 0x0000
272: bra.l _fsubd_
273: short 0x0000
274: bra.l _fsubx_
275: short 0x0000
276:
277: # leave room for future possible additions
278: align 0x400
279:
280: #
281: # This file contains a set of define statements for constants
282: # in order to promote readability within the corecode itself.
283: #
284:
285: set LOCAL_SIZE, 192 # stack frame size(bytes)
286: set LV, -LOCAL_SIZE # stack offset
287:
288: set EXC_SR, 0x4 # stack status register
289: set EXC_PC, 0x6 # stack pc
290: set EXC_VOFF, 0xa # stacked vector offset
291: set EXC_EA, 0xc # stacked <ea>
292:
293: set EXC_FP, 0x0 # frame pointer
294:
295: set EXC_AREGS, -68 # offset of all address regs
296: set EXC_DREGS, -100 # offset of all data regs
297: set EXC_FPREGS, -36 # offset of all fp regs
298:
299: set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
300: set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
301: set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
302: set EXC_A5, EXC_AREGS+(5*4)
303: set EXC_A4, EXC_AREGS+(4*4)
304: set EXC_A3, EXC_AREGS+(3*4)
305: set EXC_A2, EXC_AREGS+(2*4)
306: set EXC_A1, EXC_AREGS+(1*4)
307: set EXC_A0, EXC_AREGS+(0*4)
308: set EXC_D7, EXC_DREGS+(7*4)
309: set EXC_D6, EXC_DREGS+(6*4)
310: set EXC_D5, EXC_DREGS+(5*4)
311: set EXC_D4, EXC_DREGS+(4*4)
312: set EXC_D3, EXC_DREGS+(3*4)
313: set EXC_D2, EXC_DREGS+(2*4)
314: set EXC_D1, EXC_DREGS+(1*4)
315: set EXC_D0, EXC_DREGS+(0*4)
316:
317: set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
318: set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
319: set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
320:
321: set FP_SCR1, LV+80 # fp scratch 1
322: set FP_SCR1_EX, FP_SCR1+0
323: set FP_SCR1_SGN, FP_SCR1+2
324: set FP_SCR1_HI, FP_SCR1+4
325: set FP_SCR1_LO, FP_SCR1+8
326:
327: set FP_SCR0, LV+68 # fp scratch 0
328: set FP_SCR0_EX, FP_SCR0+0
329: set FP_SCR0_SGN, FP_SCR0+2
330: set FP_SCR0_HI, FP_SCR0+4
331: set FP_SCR0_LO, FP_SCR0+8
332:
333: set FP_DST, LV+56 # fp destination operand
334: set FP_DST_EX, FP_DST+0
335: set FP_DST_SGN, FP_DST+2
336: set FP_DST_HI, FP_DST+4
337: set FP_DST_LO, FP_DST+8
338:
339: set FP_SRC, LV+44 # fp source operand
340: set FP_SRC_EX, FP_SRC+0
341: set FP_SRC_SGN, FP_SRC+2
342: set FP_SRC_HI, FP_SRC+4
343: set FP_SRC_LO, FP_SRC+8
344:
345: set USER_FPIAR, LV+40 # FP instr address register
346:
347: set USER_FPSR, LV+36 # FP status register
348: set FPSR_CC, USER_FPSR+0 # FPSR condition codes
349: set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
350: set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
351: set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
352:
353: set USER_FPCR, LV+32 # FP control register
354: set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
355: set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
356:
357: set L_SCR3, LV+28 # integer scratch 3
358: set L_SCR2, LV+24 # integer scratch 2
359: set L_SCR1, LV+20 # integer scratch 1
360:
361: set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
362:
363: set EXC_TEMP2, LV+24 # temporary space
364: set EXC_TEMP, LV+16 # temporary space
365:
366: set DTAG, LV+15 # destination operand type
367: set STAG, LV+14 # source operand type
368:
369: set SPCOND_FLG, LV+10 # flag: special case (see below)
370:
371: set EXC_CC, LV+8 # saved condition codes
372: set EXC_EXTWPTR, LV+4 # saved current PC (active)
373: set EXC_EXTWORD, LV+2 # saved extension word
374: set EXC_CMDREG, LV+2 # saved extension word
375: set EXC_OPWORD, LV+0 # saved operation word
376:
377: ################################
378:
379: # Helpful macros
380:
381: set FTEMP, 0 # offsets within an
382: set FTEMP_EX, 0 # extended precision
383: set FTEMP_SGN, 2 # value saved in memory.
384: set FTEMP_HI, 4
385: set FTEMP_LO, 8
386: set FTEMP_GRS, 12
387:
388: set LOCAL, 0 # offsets within an
389: set LOCAL_EX, 0 # extended precision
390: set LOCAL_SGN, 2 # value saved in memory.
391: set LOCAL_HI, 4
392: set LOCAL_LO, 8
393: set LOCAL_GRS, 12
394:
395: set DST, 0 # offsets within an
396: set DST_EX, 0 # extended precision
397: set DST_HI, 4 # value saved in memory.
398: set DST_LO, 8
399:
400: set SRC, 0 # offsets within an
401: set SRC_EX, 0 # extended precision
402: set SRC_HI, 4 # value saved in memory.
403: set SRC_LO, 8
404:
405: set SGL_LO, 0x3f81 # min sgl prec exponent
406: set SGL_HI, 0x407e # max sgl prec exponent
407: set DBL_LO, 0x3c01 # min dbl prec exponent
408: set DBL_HI, 0x43fe # max dbl prec exponent
409: set EXT_LO, 0x0 # min ext prec exponent
410: set EXT_HI, 0x7ffe # max ext prec exponent
411:
412: set EXT_BIAS, 0x3fff # extended precision bias
413: set SGL_BIAS, 0x007f # single precision bias
414: set DBL_BIAS, 0x03ff # double precision bias
415:
416: set NORM, 0x00 # operand type for STAG/DTAG
417: set ZERO, 0x01 # operand type for STAG/DTAG
418: set INF, 0x02 # operand type for STAG/DTAG
419: set QNAN, 0x03 # operand type for STAG/DTAG
420: set DENORM, 0x04 # operand type for STAG/DTAG
421: set SNAN, 0x05 # operand type for STAG/DTAG
422: set UNNORM, 0x06 # operand type for STAG/DTAG
423:
424: ##################
425: # FPSR/FPCR bits #
426: ##################
427: set neg_bit, 0x3 # negative result
428: set z_bit, 0x2 # zero result
429: set inf_bit, 0x1 # infinite result
430: set nan_bit, 0x0 # NAN result
431:
432: set q_sn_bit, 0x7 # sign bit of quotient byte
433:
434: set bsun_bit, 7 # branch on unordered
435: set snan_bit, 6 # signalling NAN
436: set operr_bit, 5 # operand error
437: set ovfl_bit, 4 # overflow
438: set unfl_bit, 3 # underflow
439: set dz_bit, 2 # divide by zero
440: set inex2_bit, 1 # inexact result 2
441: set inex1_bit, 0 # inexact result 1
442:
443: set aiop_bit, 7 # accrued inexact operation bit
444: set aovfl_bit, 6 # accrued overflow bit
445: set aunfl_bit, 5 # accrued underflow bit
446: set adz_bit, 4 # accrued dz bit
447: set ainex_bit, 3 # accrued inexact bit
448:
449: #############################
450: # FPSR individual bit masks #
451: #############################
452: set neg_mask, 0x08000000 # negative bit mask (lw)
453: set inf_mask, 0x02000000 # infinity bit mask (lw)
454: set z_mask, 0x04000000 # zero bit mask (lw)
455: set nan_mask, 0x01000000 # nan bit mask (lw)
456:
457: set neg_bmask, 0x08 # negative bit mask (byte)
458: set inf_bmask, 0x02 # infinity bit mask (byte)
459: set z_bmask, 0x04 # zero bit mask (byte)
460: set nan_bmask, 0x01 # nan bit mask (byte)
461:
462: set bsun_mask, 0x00008000 # bsun exception mask
463: set snan_mask, 0x00004000 # snan exception mask
464: set operr_mask, 0x00002000 # operr exception mask
465: set ovfl_mask, 0x00001000 # overflow exception mask
466: set unfl_mask, 0x00000800 # underflow exception mask
467: set dz_mask, 0x00000400 # dz exception mask
468: set inex2_mask, 0x00000200 # inex2 exception mask
469: set inex1_mask, 0x00000100 # inex1 exception mask
470:
471: set aiop_mask, 0x00000080 # accrued illegal operation
472: set aovfl_mask, 0x00000040 # accrued overflow
473: set aunfl_mask, 0x00000020 # accrued underflow
474: set adz_mask, 0x00000010 # accrued divide by zero
475: set ainex_mask, 0x00000008 # accrued inexact
476:
477: ######################################
478: # FPSR combinations used in the FPSP #
479: ######################################
480: set dzinf_mask, inf_mask+dz_mask+adz_mask
481: set opnan_mask, nan_mask+operr_mask+aiop_mask
482: set nzi_mask, 0x01ffffff #clears N, Z, and I
483: set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
484: set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
485: set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
486: set inx1a_mask, inex1_mask+ainex_mask
487: set inx2a_mask, inex2_mask+ainex_mask
488: set snaniop_mask, nan_mask+snan_mask+aiop_mask
489: set snaniop2_mask, snan_mask+aiop_mask
490: set naniop_mask, nan_mask+aiop_mask
491: set neginf_mask, neg_mask+inf_mask
492: set infaiop_mask, inf_mask+aiop_mask
493: set negz_mask, neg_mask+z_mask
494: set opaop_mask, operr_mask+aiop_mask
495: set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
496: set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
497:
498: #########
499: # misc. #
500: #########
501: set rnd_stky_bit, 29 # stky bit pos in longword
502:
503: set sign_bit, 0x7 # sign bit
504: set signan_bit, 0x6 # signalling nan bit
505:
506: set sgl_thresh, 0x3f81 # minimum sgl exponent
507: set dbl_thresh, 0x3c01 # minimum dbl exponent
508:
509: set x_mode, 0x0 # extended precision
510: set s_mode, 0x4 # single precision
511: set d_mode, 0x8 # double precision
512:
513: set rn_mode, 0x0 # round-to-nearest
514: set rz_mode, 0x1 # round-to-zero
515: set rm_mode, 0x2 # round-tp-minus-infinity
516: set rp_mode, 0x3 # round-to-plus-infinity
517:
518: set mantissalen, 64 # length of mantissa in bits
519:
520: set BYTE, 1 # len(byte) == 1 byte
521: set WORD, 2 # len(word) == 2 bytes
522: set LONG, 4 # len(longword) == 2 bytes
523:
524: set BSUN_VEC, 0xc0 # bsun vector offset
525: set INEX_VEC, 0xc4 # inexact vector offset
526: set DZ_VEC, 0xc8 # dz vector offset
527: set UNFL_VEC, 0xcc # unfl vector offset
528: set OPERR_VEC, 0xd0 # operr vector offset
529: set OVFL_VEC, 0xd4 # ovfl vector offset
530: set SNAN_VEC, 0xd8 # snan vector offset
531:
532: ###########################
533: # SPecial CONDition FLaGs #
534: ###########################
535: set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
536: set fbsun_flg, 0x02 # flag bit: bsun exception
537: set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
538: set mda7_flg, 0x08 # flag bit: -(a7) <ea>
539: set fmovm_flg, 0x40 # flag bit: fmovm instruction
540: set immed_flg, 0x80 # flag bit: &<data> <ea>
541:
542: set ftrapcc_bit, 0x0
543: set fbsun_bit, 0x1
544: set mia7_bit, 0x2
545: set mda7_bit, 0x3
546: set immed_bit, 0x7
547:
548: ##################################
549: # TRANSCENDENTAL "LAST-OP" FLAGS #
550: ##################################
551: set FMUL_OP, 0x0 # fmul instr performed last
552: set FDIV_OP, 0x1 # fdiv performed last
553: set FADD_OP, 0x2 # fadd performed last
554: set FMOV_OP, 0x3 # fmov performed last
555:
556: #############
557: # CONSTANTS #
558: #############
559: T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
560: T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
561:
562: PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
563: PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
564:
565: TWOBYPI:
566: long 0x3FE45F30,0x6DC9C883
567:
568: #########################################################################
569: # MONADIC TEMPLATE #
570: #########################################################################
571: global _fsins_
572: _fsins_:
573: link %a6,&-LOCAL_SIZE
574:
575: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
576: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
577: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
578:
579: fmov.l &0x0,%fpcr # zero FPCR
580:
581: #
582: # copy, convert, and tag input argument
583: #
584: fmov.s 0x8(%a6),%fp0 # load sgl input
585: fmov.x %fp0,FP_SRC(%a6)
586: lea FP_SRC(%a6),%a0
587: bsr.l tag # fetch operand type
588: mov.b %d0,STAG(%a6)
589: mov.b %d0,%d1
590:
591: andi.l &0x00ff00ff,USER_FPSR(%a6)
592:
593: clr.l %d0
594: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
595:
596: tst.b %d1
597: bne.b _L0_2s
598: bsr.l ssin # operand is a NORM
599: bra.b _L0_6s
600: _L0_2s:
601: cmpi.b %d1,&ZERO # is operand a ZERO?
602: bne.b _L0_3s # no
603: bsr.l src_zero # yes
604: bra.b _L0_6s
605: _L0_3s:
606: cmpi.b %d1,&INF # is operand an INF?
607: bne.b _L0_4s # no
608: bsr.l t_operr # yes
609: bra.b _L0_6s
610: _L0_4s:
611: cmpi.b %d1,&QNAN # is operand a QNAN?
612: bne.b _L0_5s # no
613: bsr.l src_qnan # yes
614: bra.b _L0_6s
615: _L0_5s:
616: bsr.l ssind # operand is a DENORM
617: _L0_6s:
618:
619: #
620: # Result is now in FP0
621: #
622: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
623: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
624: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
625: unlk %a6
626: rts
627:
628: global _fsind_
629: _fsind_:
630: link %a6,&-LOCAL_SIZE
631:
632: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
633: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
634: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
635:
636: fmov.l &0x0,%fpcr # zero FPCR
637:
638: #
639: # copy, convert, and tag input argument
640: #
641: fmov.d 0x8(%a6),%fp0 # load dbl input
642: fmov.x %fp0,FP_SRC(%a6)
643: lea FP_SRC(%a6),%a0
644: bsr.l tag # fetch operand type
645: mov.b %d0,STAG(%a6)
646: mov.b %d0,%d1
647:
648: andi.l &0x00ff00ff,USER_FPSR(%a6)
649:
650: clr.l %d0
651: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
652:
653: mov.b %d1,STAG(%a6)
654: tst.b %d1
655: bne.b _L0_2d
656: bsr.l ssin # operand is a NORM
657: bra.b _L0_6d
658: _L0_2d:
659: cmpi.b %d1,&ZERO # is operand a ZERO?
660: bne.b _L0_3d # no
661: bsr.l src_zero # yes
662: bra.b _L0_6d
663: _L0_3d:
664: cmpi.b %d1,&INF # is operand an INF?
665: bne.b _L0_4d # no
666: bsr.l t_operr # yes
667: bra.b _L0_6d
668: _L0_4d:
669: cmpi.b %d1,&QNAN # is operand a QNAN?
670: bne.b _L0_5d # no
671: bsr.l src_qnan # yes
672: bra.b _L0_6d
673: _L0_5d:
674: bsr.l ssind # operand is a DENORM
675: _L0_6d:
676:
677: #
678: # Result is now in FP0
679: #
680: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
681: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
682: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
683: unlk %a6
684: rts
685:
686: global _fsinx_
687: _fsinx_:
688: link %a6,&-LOCAL_SIZE
689:
690: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
691: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
692: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
693:
694: fmov.l &0x0,%fpcr # zero FPCR
695:
696: #
697: # copy, convert, and tag input argument
698: #
699: lea FP_SRC(%a6),%a0
700: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
701: mov.l 0x8+0x4(%a6),0x4(%a0)
702: mov.l 0x8+0x8(%a6),0x8(%a0)
703: bsr.l tag # fetch operand type
704: mov.b %d0,STAG(%a6)
705: mov.b %d0,%d1
706:
707: andi.l &0x00ff00ff,USER_FPSR(%a6)
708:
709: clr.l %d0
710: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
711:
712: tst.b %d1
713: bne.b _L0_2x
714: bsr.l ssin # operand is a NORM
715: bra.b _L0_6x
716: _L0_2x:
717: cmpi.b %d1,&ZERO # is operand a ZERO?
718: bne.b _L0_3x # no
719: bsr.l src_zero # yes
720: bra.b _L0_6x
721: _L0_3x:
722: cmpi.b %d1,&INF # is operand an INF?
723: bne.b _L0_4x # no
724: bsr.l t_operr # yes
725: bra.b _L0_6x
726: _L0_4x:
727: cmpi.b %d1,&QNAN # is operand a QNAN?
728: bne.b _L0_5x # no
729: bsr.l src_qnan # yes
730: bra.b _L0_6x
731: _L0_5x:
732: bsr.l ssind # operand is a DENORM
733: _L0_6x:
734:
735: #
736: # Result is now in FP0
737: #
738: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
739: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
740: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
741: unlk %a6
742: rts
743:
744:
745: #########################################################################
746: # MONADIC TEMPLATE #
747: #########################################################################
748: global _fcoss_
749: _fcoss_:
750: link %a6,&-LOCAL_SIZE
751:
752: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
753: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
754: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
755:
756: fmov.l &0x0,%fpcr # zero FPCR
757:
758: #
759: # copy, convert, and tag input argument
760: #
761: fmov.s 0x8(%a6),%fp0 # load sgl input
762: fmov.x %fp0,FP_SRC(%a6)
763: lea FP_SRC(%a6),%a0
764: bsr.l tag # fetch operand type
765: mov.b %d0,STAG(%a6)
766: mov.b %d0,%d1
767:
768: andi.l &0x00ff00ff,USER_FPSR(%a6)
769:
770: clr.l %d0
771: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
772:
773: tst.b %d1
774: bne.b _L1_2s
775: bsr.l scos # operand is a NORM
776: bra.b _L1_6s
777: _L1_2s:
778: cmpi.b %d1,&ZERO # is operand a ZERO?
779: bne.b _L1_3s # no
780: bsr.l ld_pone # yes
781: bra.b _L1_6s
782: _L1_3s:
783: cmpi.b %d1,&INF # is operand an INF?
784: bne.b _L1_4s # no
785: bsr.l t_operr # yes
786: bra.b _L1_6s
787: _L1_4s:
788: cmpi.b %d1,&QNAN # is operand a QNAN?
789: bne.b _L1_5s # no
790: bsr.l src_qnan # yes
791: bra.b _L1_6s
792: _L1_5s:
793: bsr.l scosd # operand is a DENORM
794: _L1_6s:
795:
796: #
797: # Result is now in FP0
798: #
799: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
800: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
801: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
802: unlk %a6
803: rts
804:
805: global _fcosd_
806: _fcosd_:
807: link %a6,&-LOCAL_SIZE
808:
809: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
810: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
811: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
812:
813: fmov.l &0x0,%fpcr # zero FPCR
814:
815: #
816: # copy, convert, and tag input argument
817: #
818: fmov.d 0x8(%a6),%fp0 # load dbl input
819: fmov.x %fp0,FP_SRC(%a6)
820: lea FP_SRC(%a6),%a0
821: bsr.l tag # fetch operand type
822: mov.b %d0,STAG(%a6)
823: mov.b %d0,%d1
824:
825: andi.l &0x00ff00ff,USER_FPSR(%a6)
826:
827: clr.l %d0
828: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
829:
830: mov.b %d1,STAG(%a6)
831: tst.b %d1
832: bne.b _L1_2d
833: bsr.l scos # operand is a NORM
834: bra.b _L1_6d
835: _L1_2d:
836: cmpi.b %d1,&ZERO # is operand a ZERO?
837: bne.b _L1_3d # no
838: bsr.l ld_pone # yes
839: bra.b _L1_6d
840: _L1_3d:
841: cmpi.b %d1,&INF # is operand an INF?
842: bne.b _L1_4d # no
843: bsr.l t_operr # yes
844: bra.b _L1_6d
845: _L1_4d:
846: cmpi.b %d1,&QNAN # is operand a QNAN?
847: bne.b _L1_5d # no
848: bsr.l src_qnan # yes
849: bra.b _L1_6d
850: _L1_5d:
851: bsr.l scosd # operand is a DENORM
852: _L1_6d:
853:
854: #
855: # Result is now in FP0
856: #
857: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
858: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
859: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
860: unlk %a6
861: rts
862:
863: global _fcosx_
864: _fcosx_:
865: link %a6,&-LOCAL_SIZE
866:
867: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
868: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
869: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
870:
871: fmov.l &0x0,%fpcr # zero FPCR
872:
873: #
874: # copy, convert, and tag input argument
875: #
876: lea FP_SRC(%a6),%a0
877: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
878: mov.l 0x8+0x4(%a6),0x4(%a0)
879: mov.l 0x8+0x8(%a6),0x8(%a0)
880: bsr.l tag # fetch operand type
881: mov.b %d0,STAG(%a6)
882: mov.b %d0,%d1
883:
884: andi.l &0x00ff00ff,USER_FPSR(%a6)
885:
886: clr.l %d0
887: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
888:
889: tst.b %d1
890: bne.b _L1_2x
891: bsr.l scos # operand is a NORM
892: bra.b _L1_6x
893: _L1_2x:
894: cmpi.b %d1,&ZERO # is operand a ZERO?
895: bne.b _L1_3x # no
896: bsr.l ld_pone # yes
897: bra.b _L1_6x
898: _L1_3x:
899: cmpi.b %d1,&INF # is operand an INF?
900: bne.b _L1_4x # no
901: bsr.l t_operr # yes
902: bra.b _L1_6x
903: _L1_4x:
904: cmpi.b %d1,&QNAN # is operand a QNAN?
905: bne.b _L1_5x # no
906: bsr.l src_qnan # yes
907: bra.b _L1_6x
908: _L1_5x:
909: bsr.l scosd # operand is a DENORM
910: _L1_6x:
911:
912: #
913: # Result is now in FP0
914: #
915: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
916: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
917: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
918: unlk %a6
919: rts
920:
921:
922: #########################################################################
923: # MONADIC TEMPLATE #
924: #########################################################################
925: global _fsinhs_
926: _fsinhs_:
927: link %a6,&-LOCAL_SIZE
928:
929: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
930: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
931: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
932:
933: fmov.l &0x0,%fpcr # zero FPCR
934:
935: #
936: # copy, convert, and tag input argument
937: #
938: fmov.s 0x8(%a6),%fp0 # load sgl input
939: fmov.x %fp0,FP_SRC(%a6)
940: lea FP_SRC(%a6),%a0
941: bsr.l tag # fetch operand type
942: mov.b %d0,STAG(%a6)
943: mov.b %d0,%d1
944:
945: andi.l &0x00ff00ff,USER_FPSR(%a6)
946:
947: clr.l %d0
948: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
949:
950: tst.b %d1
951: bne.b _L2_2s
952: bsr.l ssinh # operand is a NORM
953: bra.b _L2_6s
954: _L2_2s:
955: cmpi.b %d1,&ZERO # is operand a ZERO?
956: bne.b _L2_3s # no
957: bsr.l src_zero # yes
958: bra.b _L2_6s
959: _L2_3s:
960: cmpi.b %d1,&INF # is operand an INF?
961: bne.b _L2_4s # no
962: bsr.l src_inf # yes
963: bra.b _L2_6s
964: _L2_4s:
965: cmpi.b %d1,&QNAN # is operand a QNAN?
966: bne.b _L2_5s # no
967: bsr.l src_qnan # yes
968: bra.b _L2_6s
969: _L2_5s:
970: bsr.l ssinhd # operand is a DENORM
971: _L2_6s:
972:
973: #
974: # Result is now in FP0
975: #
976: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
977: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
978: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
979: unlk %a6
980: rts
981:
982: global _fsinhd_
983: _fsinhd_:
984: link %a6,&-LOCAL_SIZE
985:
986: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
987: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
988: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
989:
990: fmov.l &0x0,%fpcr # zero FPCR
991:
992: #
993: # copy, convert, and tag input argument
994: #
995: fmov.d 0x8(%a6),%fp0 # load dbl input
996: fmov.x %fp0,FP_SRC(%a6)
997: lea FP_SRC(%a6),%a0
998: bsr.l tag # fetch operand type
999: mov.b %d0,STAG(%a6)
1000: mov.b %d0,%d1
1001:
1002: andi.l &0x00ff00ff,USER_FPSR(%a6)
1003:
1004: clr.l %d0
1005: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1006:
1007: mov.b %d1,STAG(%a6)
1008: tst.b %d1
1009: bne.b _L2_2d
1010: bsr.l ssinh # operand is a NORM
1011: bra.b _L2_6d
1012: _L2_2d:
1013: cmpi.b %d1,&ZERO # is operand a ZERO?
1014: bne.b _L2_3d # no
1015: bsr.l src_zero # yes
1016: bra.b _L2_6d
1017: _L2_3d:
1018: cmpi.b %d1,&INF # is operand an INF?
1019: bne.b _L2_4d # no
1020: bsr.l src_inf # yes
1021: bra.b _L2_6d
1022: _L2_4d:
1023: cmpi.b %d1,&QNAN # is operand a QNAN?
1024: bne.b _L2_5d # no
1025: bsr.l src_qnan # yes
1026: bra.b _L2_6d
1027: _L2_5d:
1028: bsr.l ssinhd # operand is a DENORM
1029: _L2_6d:
1030:
1031: #
1032: # Result is now in FP0
1033: #
1034: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1035: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1036: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1037: unlk %a6
1038: rts
1039:
1040: global _fsinhx_
1041: _fsinhx_:
1042: link %a6,&-LOCAL_SIZE
1043:
1044: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1045: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1046: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1047:
1048: fmov.l &0x0,%fpcr # zero FPCR
1049:
1050: #
1051: # copy, convert, and tag input argument
1052: #
1053: lea FP_SRC(%a6),%a0
1054: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1055: mov.l 0x8+0x4(%a6),0x4(%a0)
1056: mov.l 0x8+0x8(%a6),0x8(%a0)
1057: bsr.l tag # fetch operand type
1058: mov.b %d0,STAG(%a6)
1059: mov.b %d0,%d1
1060:
1061: andi.l &0x00ff00ff,USER_FPSR(%a6)
1062:
1063: clr.l %d0
1064: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1065:
1066: tst.b %d1
1067: bne.b _L2_2x
1068: bsr.l ssinh # operand is a NORM
1069: bra.b _L2_6x
1070: _L2_2x:
1071: cmpi.b %d1,&ZERO # is operand a ZERO?
1072: bne.b _L2_3x # no
1073: bsr.l src_zero # yes
1074: bra.b _L2_6x
1075: _L2_3x:
1076: cmpi.b %d1,&INF # is operand an INF?
1077: bne.b _L2_4x # no
1078: bsr.l src_inf # yes
1079: bra.b _L2_6x
1080: _L2_4x:
1081: cmpi.b %d1,&QNAN # is operand a QNAN?
1082: bne.b _L2_5x # no
1083: bsr.l src_qnan # yes
1084: bra.b _L2_6x
1085: _L2_5x:
1086: bsr.l ssinhd # operand is a DENORM
1087: _L2_6x:
1088:
1089: #
1090: # Result is now in FP0
1091: #
1092: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1093: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1094: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1095: unlk %a6
1096: rts
1097:
1098:
1099: #########################################################################
1100: # MONADIC TEMPLATE #
1101: #########################################################################
1102: global _flognp1s_
1103: _flognp1s_:
1104: link %a6,&-LOCAL_SIZE
1105:
1106: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1107: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1108: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1109:
1110: fmov.l &0x0,%fpcr # zero FPCR
1111:
1112: #
1113: # copy, convert, and tag input argument
1114: #
1115: fmov.s 0x8(%a6),%fp0 # load sgl input
1116: fmov.x %fp0,FP_SRC(%a6)
1117: lea FP_SRC(%a6),%a0
1118: bsr.l tag # fetch operand type
1119: mov.b %d0,STAG(%a6)
1120: mov.b %d0,%d1
1121:
1122: andi.l &0x00ff00ff,USER_FPSR(%a6)
1123:
1124: clr.l %d0
1125: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1126:
1127: tst.b %d1
1128: bne.b _L3_2s
1129: bsr.l slognp1 # operand is a NORM
1130: bra.b _L3_6s
1131: _L3_2s:
1132: cmpi.b %d1,&ZERO # is operand a ZERO?
1133: bne.b _L3_3s # no
1134: bsr.l src_zero # yes
1135: bra.b _L3_6s
1136: _L3_3s:
1137: cmpi.b %d1,&INF # is operand an INF?
1138: bne.b _L3_4s # no
1139: bsr.l sopr_inf # yes
1140: bra.b _L3_6s
1141: _L3_4s:
1142: cmpi.b %d1,&QNAN # is operand a QNAN?
1143: bne.b _L3_5s # no
1144: bsr.l src_qnan # yes
1145: bra.b _L3_6s
1146: _L3_5s:
1147: bsr.l slognp1d # operand is a DENORM
1148: _L3_6s:
1149:
1150: #
1151: # Result is now in FP0
1152: #
1153: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1154: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1155: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1156: unlk %a6
1157: rts
1158:
1159: global _flognp1d_
1160: _flognp1d_:
1161: link %a6,&-LOCAL_SIZE
1162:
1163: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1164: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1165: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1166:
1167: fmov.l &0x0,%fpcr # zero FPCR
1168:
1169: #
1170: # copy, convert, and tag input argument
1171: #
1172: fmov.d 0x8(%a6),%fp0 # load dbl input
1173: fmov.x %fp0,FP_SRC(%a6)
1174: lea FP_SRC(%a6),%a0
1175: bsr.l tag # fetch operand type
1176: mov.b %d0,STAG(%a6)
1177: mov.b %d0,%d1
1178:
1179: andi.l &0x00ff00ff,USER_FPSR(%a6)
1180:
1181: clr.l %d0
1182: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1183:
1184: mov.b %d1,STAG(%a6)
1185: tst.b %d1
1186: bne.b _L3_2d
1187: bsr.l slognp1 # operand is a NORM
1188: bra.b _L3_6d
1189: _L3_2d:
1190: cmpi.b %d1,&ZERO # is operand a ZERO?
1191: bne.b _L3_3d # no
1192: bsr.l src_zero # yes
1193: bra.b _L3_6d
1194: _L3_3d:
1195: cmpi.b %d1,&INF # is operand an INF?
1196: bne.b _L3_4d # no
1197: bsr.l sopr_inf # yes
1198: bra.b _L3_6d
1199: _L3_4d:
1200: cmpi.b %d1,&QNAN # is operand a QNAN?
1201: bne.b _L3_5d # no
1202: bsr.l src_qnan # yes
1203: bra.b _L3_6d
1204: _L3_5d:
1205: bsr.l slognp1d # operand is a DENORM
1206: _L3_6d:
1207:
1208: #
1209: # Result is now in FP0
1210: #
1211: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1212: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1213: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1214: unlk %a6
1215: rts
1216:
1217: global _flognp1x_
1218: _flognp1x_:
1219: link %a6,&-LOCAL_SIZE
1220:
1221: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1222: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1223: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1224:
1225: fmov.l &0x0,%fpcr # zero FPCR
1226:
1227: #
1228: # copy, convert, and tag input argument
1229: #
1230: lea FP_SRC(%a6),%a0
1231: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1232: mov.l 0x8+0x4(%a6),0x4(%a0)
1233: mov.l 0x8+0x8(%a6),0x8(%a0)
1234: bsr.l tag # fetch operand type
1235: mov.b %d0,STAG(%a6)
1236: mov.b %d0,%d1
1237:
1238: andi.l &0x00ff00ff,USER_FPSR(%a6)
1239:
1240: clr.l %d0
1241: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1242:
1243: tst.b %d1
1244: bne.b _L3_2x
1245: bsr.l slognp1 # operand is a NORM
1246: bra.b _L3_6x
1247: _L3_2x:
1248: cmpi.b %d1,&ZERO # is operand a ZERO?
1249: bne.b _L3_3x # no
1250: bsr.l src_zero # yes
1251: bra.b _L3_6x
1252: _L3_3x:
1253: cmpi.b %d1,&INF # is operand an INF?
1254: bne.b _L3_4x # no
1255: bsr.l sopr_inf # yes
1256: bra.b _L3_6x
1257: _L3_4x:
1258: cmpi.b %d1,&QNAN # is operand a QNAN?
1259: bne.b _L3_5x # no
1260: bsr.l src_qnan # yes
1261: bra.b _L3_6x
1262: _L3_5x:
1263: bsr.l slognp1d # operand is a DENORM
1264: _L3_6x:
1265:
1266: #
1267: # Result is now in FP0
1268: #
1269: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1270: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1271: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1272: unlk %a6
1273: rts
1274:
1275:
1276: #########################################################################
1277: # MONADIC TEMPLATE #
1278: #########################################################################
1279: global _fetoxm1s_
1280: _fetoxm1s_:
1281: link %a6,&-LOCAL_SIZE
1282:
1283: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1284: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1285: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1286:
1287: fmov.l &0x0,%fpcr # zero FPCR
1288:
1289: #
1290: # copy, convert, and tag input argument
1291: #
1292: fmov.s 0x8(%a6),%fp0 # load sgl input
1293: fmov.x %fp0,FP_SRC(%a6)
1294: lea FP_SRC(%a6),%a0
1295: bsr.l tag # fetch operand type
1296: mov.b %d0,STAG(%a6)
1297: mov.b %d0,%d1
1298:
1299: andi.l &0x00ff00ff,USER_FPSR(%a6)
1300:
1301: clr.l %d0
1302: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1303:
1304: tst.b %d1
1305: bne.b _L4_2s
1306: bsr.l setoxm1 # operand is a NORM
1307: bra.b _L4_6s
1308: _L4_2s:
1309: cmpi.b %d1,&ZERO # is operand a ZERO?
1310: bne.b _L4_3s # no
1311: bsr.l src_zero # yes
1312: bra.b _L4_6s
1313: _L4_3s:
1314: cmpi.b %d1,&INF # is operand an INF?
1315: bne.b _L4_4s # no
1316: bsr.l setoxm1i # yes
1317: bra.b _L4_6s
1318: _L4_4s:
1319: cmpi.b %d1,&QNAN # is operand a QNAN?
1320: bne.b _L4_5s # no
1321: bsr.l src_qnan # yes
1322: bra.b _L4_6s
1323: _L4_5s:
1324: bsr.l setoxm1d # operand is a DENORM
1325: _L4_6s:
1326:
1327: #
1328: # Result is now in FP0
1329: #
1330: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1331: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1332: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1333: unlk %a6
1334: rts
1335:
1336: global _fetoxm1d_
1337: _fetoxm1d_:
1338: link %a6,&-LOCAL_SIZE
1339:
1340: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1341: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1342: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1343:
1344: fmov.l &0x0,%fpcr # zero FPCR
1345:
1346: #
1347: # copy, convert, and tag input argument
1348: #
1349: fmov.d 0x8(%a6),%fp0 # load dbl input
1350: fmov.x %fp0,FP_SRC(%a6)
1351: lea FP_SRC(%a6),%a0
1352: bsr.l tag # fetch operand type
1353: mov.b %d0,STAG(%a6)
1354: mov.b %d0,%d1
1355:
1356: andi.l &0x00ff00ff,USER_FPSR(%a6)
1357:
1358: clr.l %d0
1359: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1360:
1361: mov.b %d1,STAG(%a6)
1362: tst.b %d1
1363: bne.b _L4_2d
1364: bsr.l setoxm1 # operand is a NORM
1365: bra.b _L4_6d
1366: _L4_2d:
1367: cmpi.b %d1,&ZERO # is operand a ZERO?
1368: bne.b _L4_3d # no
1369: bsr.l src_zero # yes
1370: bra.b _L4_6d
1371: _L4_3d:
1372: cmpi.b %d1,&INF # is operand an INF?
1373: bne.b _L4_4d # no
1374: bsr.l setoxm1i # yes
1375: bra.b _L4_6d
1376: _L4_4d:
1377: cmpi.b %d1,&QNAN # is operand a QNAN?
1378: bne.b _L4_5d # no
1379: bsr.l src_qnan # yes
1380: bra.b _L4_6d
1381: _L4_5d:
1382: bsr.l setoxm1d # operand is a DENORM
1383: _L4_6d:
1384:
1385: #
1386: # Result is now in FP0
1387: #
1388: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1389: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1390: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1391: unlk %a6
1392: rts
1393:
1394: global _fetoxm1x_
1395: _fetoxm1x_:
1396: link %a6,&-LOCAL_SIZE
1397:
1398: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1399: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1400: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1401:
1402: fmov.l &0x0,%fpcr # zero FPCR
1403:
1404: #
1405: # copy, convert, and tag input argument
1406: #
1407: lea FP_SRC(%a6),%a0
1408: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1409: mov.l 0x8+0x4(%a6),0x4(%a0)
1410: mov.l 0x8+0x8(%a6),0x8(%a0)
1411: bsr.l tag # fetch operand type
1412: mov.b %d0,STAG(%a6)
1413: mov.b %d0,%d1
1414:
1415: andi.l &0x00ff00ff,USER_FPSR(%a6)
1416:
1417: clr.l %d0
1418: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1419:
1420: tst.b %d1
1421: bne.b _L4_2x
1422: bsr.l setoxm1 # operand is a NORM
1423: bra.b _L4_6x
1424: _L4_2x:
1425: cmpi.b %d1,&ZERO # is operand a ZERO?
1426: bne.b _L4_3x # no
1427: bsr.l src_zero # yes
1428: bra.b _L4_6x
1429: _L4_3x:
1430: cmpi.b %d1,&INF # is operand an INF?
1431: bne.b _L4_4x # no
1432: bsr.l setoxm1i # yes
1433: bra.b _L4_6x
1434: _L4_4x:
1435: cmpi.b %d1,&QNAN # is operand a QNAN?
1436: bne.b _L4_5x # no
1437: bsr.l src_qnan # yes
1438: bra.b _L4_6x
1439: _L4_5x:
1440: bsr.l setoxm1d # operand is a DENORM
1441: _L4_6x:
1442:
1443: #
1444: # Result is now in FP0
1445: #
1446: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1447: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1448: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1449: unlk %a6
1450: rts
1451:
1452:
1453: #########################################################################
1454: # MONADIC TEMPLATE #
1455: #########################################################################
1456: global _ftanhs_
1457: _ftanhs_:
1458: link %a6,&-LOCAL_SIZE
1459:
1460: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1461: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1462: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1463:
1464: fmov.l &0x0,%fpcr # zero FPCR
1465:
1466: #
1467: # copy, convert, and tag input argument
1468: #
1469: fmov.s 0x8(%a6),%fp0 # load sgl input
1470: fmov.x %fp0,FP_SRC(%a6)
1471: lea FP_SRC(%a6),%a0
1472: bsr.l tag # fetch operand type
1473: mov.b %d0,STAG(%a6)
1474: mov.b %d0,%d1
1475:
1476: andi.l &0x00ff00ff,USER_FPSR(%a6)
1477:
1478: clr.l %d0
1479: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1480:
1481: tst.b %d1
1482: bne.b _L5_2s
1483: bsr.l stanh # operand is a NORM
1484: bra.b _L5_6s
1485: _L5_2s:
1486: cmpi.b %d1,&ZERO # is operand a ZERO?
1487: bne.b _L5_3s # no
1488: bsr.l src_zero # yes
1489: bra.b _L5_6s
1490: _L5_3s:
1491: cmpi.b %d1,&INF # is operand an INF?
1492: bne.b _L5_4s # no
1493: bsr.l src_one # yes
1494: bra.b _L5_6s
1495: _L5_4s:
1496: cmpi.b %d1,&QNAN # is operand a QNAN?
1497: bne.b _L5_5s # no
1498: bsr.l src_qnan # yes
1499: bra.b _L5_6s
1500: _L5_5s:
1501: bsr.l stanhd # operand is a DENORM
1502: _L5_6s:
1503:
1504: #
1505: # Result is now in FP0
1506: #
1507: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1508: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1509: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1510: unlk %a6
1511: rts
1512:
1513: global _ftanhd_
1514: _ftanhd_:
1515: link %a6,&-LOCAL_SIZE
1516:
1517: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1518: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1519: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1520:
1521: fmov.l &0x0,%fpcr # zero FPCR
1522:
1523: #
1524: # copy, convert, and tag input argument
1525: #
1526: fmov.d 0x8(%a6),%fp0 # load dbl input
1527: fmov.x %fp0,FP_SRC(%a6)
1528: lea FP_SRC(%a6),%a0
1529: bsr.l tag # fetch operand type
1530: mov.b %d0,STAG(%a6)
1531: mov.b %d0,%d1
1532:
1533: andi.l &0x00ff00ff,USER_FPSR(%a6)
1534:
1535: clr.l %d0
1536: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1537:
1538: mov.b %d1,STAG(%a6)
1539: tst.b %d1
1540: bne.b _L5_2d
1541: bsr.l stanh # operand is a NORM
1542: bra.b _L5_6d
1543: _L5_2d:
1544: cmpi.b %d1,&ZERO # is operand a ZERO?
1545: bne.b _L5_3d # no
1546: bsr.l src_zero # yes
1547: bra.b _L5_6d
1548: _L5_3d:
1549: cmpi.b %d1,&INF # is operand an INF?
1550: bne.b _L5_4d # no
1551: bsr.l src_one # yes
1552: bra.b _L5_6d
1553: _L5_4d:
1554: cmpi.b %d1,&QNAN # is operand a QNAN?
1555: bne.b _L5_5d # no
1556: bsr.l src_qnan # yes
1557: bra.b _L5_6d
1558: _L5_5d:
1559: bsr.l stanhd # operand is a DENORM
1560: _L5_6d:
1561:
1562: #
1563: # Result is now in FP0
1564: #
1565: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1566: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1567: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1568: unlk %a6
1569: rts
1570:
1571: global _ftanhx_
1572: _ftanhx_:
1573: link %a6,&-LOCAL_SIZE
1574:
1575: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1576: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1577: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1578:
1579: fmov.l &0x0,%fpcr # zero FPCR
1580:
1581: #
1582: # copy, convert, and tag input argument
1583: #
1584: lea FP_SRC(%a6),%a0
1585: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1586: mov.l 0x8+0x4(%a6),0x4(%a0)
1587: mov.l 0x8+0x8(%a6),0x8(%a0)
1588: bsr.l tag # fetch operand type
1589: mov.b %d0,STAG(%a6)
1590: mov.b %d0,%d1
1591:
1592: andi.l &0x00ff00ff,USER_FPSR(%a6)
1593:
1594: clr.l %d0
1595: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1596:
1597: tst.b %d1
1598: bne.b _L5_2x
1599: bsr.l stanh # operand is a NORM
1600: bra.b _L5_6x
1601: _L5_2x:
1602: cmpi.b %d1,&ZERO # is operand a ZERO?
1603: bne.b _L5_3x # no
1604: bsr.l src_zero # yes
1605: bra.b _L5_6x
1606: _L5_3x:
1607: cmpi.b %d1,&INF # is operand an INF?
1608: bne.b _L5_4x # no
1609: bsr.l src_one # yes
1610: bra.b _L5_6x
1611: _L5_4x:
1612: cmpi.b %d1,&QNAN # is operand a QNAN?
1613: bne.b _L5_5x # no
1614: bsr.l src_qnan # yes
1615: bra.b _L5_6x
1616: _L5_5x:
1617: bsr.l stanhd # operand is a DENORM
1618: _L5_6x:
1619:
1620: #
1621: # Result is now in FP0
1622: #
1623: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1624: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1625: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1626: unlk %a6
1627: rts
1628:
1629:
1630: #########################################################################
1631: # MONADIC TEMPLATE #
1632: #########################################################################
1633: global _fatans_
1634: _fatans_:
1635: link %a6,&-LOCAL_SIZE
1636:
1637: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1638: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1639: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1640:
1641: fmov.l &0x0,%fpcr # zero FPCR
1642:
1643: #
1644: # copy, convert, and tag input argument
1645: #
1646: fmov.s 0x8(%a6),%fp0 # load sgl input
1647: fmov.x %fp0,FP_SRC(%a6)
1648: lea FP_SRC(%a6),%a0
1649: bsr.l tag # fetch operand type
1650: mov.b %d0,STAG(%a6)
1651: mov.b %d0,%d1
1652:
1653: andi.l &0x00ff00ff,USER_FPSR(%a6)
1654:
1655: clr.l %d0
1656: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1657:
1658: tst.b %d1
1659: bne.b _L6_2s
1660: bsr.l satan # operand is a NORM
1661: bra.b _L6_6s
1662: _L6_2s:
1663: cmpi.b %d1,&ZERO # is operand a ZERO?
1664: bne.b _L6_3s # no
1665: bsr.l src_zero # yes
1666: bra.b _L6_6s
1667: _L6_3s:
1668: cmpi.b %d1,&INF # is operand an INF?
1669: bne.b _L6_4s # no
1670: bsr.l spi_2 # yes
1671: bra.b _L6_6s
1672: _L6_4s:
1673: cmpi.b %d1,&QNAN # is operand a QNAN?
1674: bne.b _L6_5s # no
1675: bsr.l src_qnan # yes
1676: bra.b _L6_6s
1677: _L6_5s:
1678: bsr.l satand # operand is a DENORM
1679: _L6_6s:
1680:
1681: #
1682: # Result is now in FP0
1683: #
1684: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1685: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1686: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1687: unlk %a6
1688: rts
1689:
1690: global _fatand_
1691: _fatand_:
1692: link %a6,&-LOCAL_SIZE
1693:
1694: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1695: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1696: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1697:
1698: fmov.l &0x0,%fpcr # zero FPCR
1699:
1700: #
1701: # copy, convert, and tag input argument
1702: #
1703: fmov.d 0x8(%a6),%fp0 # load dbl input
1704: fmov.x %fp0,FP_SRC(%a6)
1705: lea FP_SRC(%a6),%a0
1706: bsr.l tag # fetch operand type
1707: mov.b %d0,STAG(%a6)
1708: mov.b %d0,%d1
1709:
1710: andi.l &0x00ff00ff,USER_FPSR(%a6)
1711:
1712: clr.l %d0
1713: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1714:
1715: mov.b %d1,STAG(%a6)
1716: tst.b %d1
1717: bne.b _L6_2d
1718: bsr.l satan # operand is a NORM
1719: bra.b _L6_6d
1720: _L6_2d:
1721: cmpi.b %d1,&ZERO # is operand a ZERO?
1722: bne.b _L6_3d # no
1723: bsr.l src_zero # yes
1724: bra.b _L6_6d
1725: _L6_3d:
1726: cmpi.b %d1,&INF # is operand an INF?
1727: bne.b _L6_4d # no
1728: bsr.l spi_2 # yes
1729: bra.b _L6_6d
1730: _L6_4d:
1731: cmpi.b %d1,&QNAN # is operand a QNAN?
1732: bne.b _L6_5d # no
1733: bsr.l src_qnan # yes
1734: bra.b _L6_6d
1735: _L6_5d:
1736: bsr.l satand # operand is a DENORM
1737: _L6_6d:
1738:
1739: #
1740: # Result is now in FP0
1741: #
1742: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1743: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1744: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1745: unlk %a6
1746: rts
1747:
1748: global _fatanx_
1749: _fatanx_:
1750: link %a6,&-LOCAL_SIZE
1751:
1752: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1753: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1754: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1755:
1756: fmov.l &0x0,%fpcr # zero FPCR
1757:
1758: #
1759: # copy, convert, and tag input argument
1760: #
1761: lea FP_SRC(%a6),%a0
1762: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1763: mov.l 0x8+0x4(%a6),0x4(%a0)
1764: mov.l 0x8+0x8(%a6),0x8(%a0)
1765: bsr.l tag # fetch operand type
1766: mov.b %d0,STAG(%a6)
1767: mov.b %d0,%d1
1768:
1769: andi.l &0x00ff00ff,USER_FPSR(%a6)
1770:
1771: clr.l %d0
1772: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1773:
1774: tst.b %d1
1775: bne.b _L6_2x
1776: bsr.l satan # operand is a NORM
1777: bra.b _L6_6x
1778: _L6_2x:
1779: cmpi.b %d1,&ZERO # is operand a ZERO?
1780: bne.b _L6_3x # no
1781: bsr.l src_zero # yes
1782: bra.b _L6_6x
1783: _L6_3x:
1784: cmpi.b %d1,&INF # is operand an INF?
1785: bne.b _L6_4x # no
1786: bsr.l spi_2 # yes
1787: bra.b _L6_6x
1788: _L6_4x:
1789: cmpi.b %d1,&QNAN # is operand a QNAN?
1790: bne.b _L6_5x # no
1791: bsr.l src_qnan # yes
1792: bra.b _L6_6x
1793: _L6_5x:
1794: bsr.l satand # operand is a DENORM
1795: _L6_6x:
1796:
1797: #
1798: # Result is now in FP0
1799: #
1800: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1801: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1802: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1803: unlk %a6
1804: rts
1805:
1806:
1807: #########################################################################
1808: # MONADIC TEMPLATE #
1809: #########################################################################
1810: global _fasins_
1811: _fasins_:
1812: link %a6,&-LOCAL_SIZE
1813:
1814: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1815: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1816: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1817:
1818: fmov.l &0x0,%fpcr # zero FPCR
1819:
1820: #
1821: # copy, convert, and tag input argument
1822: #
1823: fmov.s 0x8(%a6),%fp0 # load sgl input
1824: fmov.x %fp0,FP_SRC(%a6)
1825: lea FP_SRC(%a6),%a0
1826: bsr.l tag # fetch operand type
1827: mov.b %d0,STAG(%a6)
1828: mov.b %d0,%d1
1829:
1830: andi.l &0x00ff00ff,USER_FPSR(%a6)
1831:
1832: clr.l %d0
1833: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1834:
1835: tst.b %d1
1836: bne.b _L7_2s
1837: bsr.l sasin # operand is a NORM
1838: bra.b _L7_6s
1839: _L7_2s:
1840: cmpi.b %d1,&ZERO # is operand a ZERO?
1841: bne.b _L7_3s # no
1842: bsr.l src_zero # yes
1843: bra.b _L7_6s
1844: _L7_3s:
1845: cmpi.b %d1,&INF # is operand an INF?
1846: bne.b _L7_4s # no
1847: bsr.l t_operr # yes
1848: bra.b _L7_6s
1849: _L7_4s:
1850: cmpi.b %d1,&QNAN # is operand a QNAN?
1851: bne.b _L7_5s # no
1852: bsr.l src_qnan # yes
1853: bra.b _L7_6s
1854: _L7_5s:
1855: bsr.l sasind # operand is a DENORM
1856: _L7_6s:
1857:
1858: #
1859: # Result is now in FP0
1860: #
1861: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1862: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1863: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1864: unlk %a6
1865: rts
1866:
1867: global _fasind_
1868: _fasind_:
1869: link %a6,&-LOCAL_SIZE
1870:
1871: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1872: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1873: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1874:
1875: fmov.l &0x0,%fpcr # zero FPCR
1876:
1877: #
1878: # copy, convert, and tag input argument
1879: #
1880: fmov.d 0x8(%a6),%fp0 # load dbl input
1881: fmov.x %fp0,FP_SRC(%a6)
1882: lea FP_SRC(%a6),%a0
1883: bsr.l tag # fetch operand type
1884: mov.b %d0,STAG(%a6)
1885: mov.b %d0,%d1
1886:
1887: andi.l &0x00ff00ff,USER_FPSR(%a6)
1888:
1889: clr.l %d0
1890: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1891:
1892: mov.b %d1,STAG(%a6)
1893: tst.b %d1
1894: bne.b _L7_2d
1895: bsr.l sasin # operand is a NORM
1896: bra.b _L7_6d
1897: _L7_2d:
1898: cmpi.b %d1,&ZERO # is operand a ZERO?
1899: bne.b _L7_3d # no
1900: bsr.l src_zero # yes
1901: bra.b _L7_6d
1902: _L7_3d:
1903: cmpi.b %d1,&INF # is operand an INF?
1904: bne.b _L7_4d # no
1905: bsr.l t_operr # yes
1906: bra.b _L7_6d
1907: _L7_4d:
1908: cmpi.b %d1,&QNAN # is operand a QNAN?
1909: bne.b _L7_5d # no
1910: bsr.l src_qnan # yes
1911: bra.b _L7_6d
1912: _L7_5d:
1913: bsr.l sasind # operand is a DENORM
1914: _L7_6d:
1915:
1916: #
1917: # Result is now in FP0
1918: #
1919: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1920: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1921: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1922: unlk %a6
1923: rts
1924:
1925: global _fasinx_
1926: _fasinx_:
1927: link %a6,&-LOCAL_SIZE
1928:
1929: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1930: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1931: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1932:
1933: fmov.l &0x0,%fpcr # zero FPCR
1934:
1935: #
1936: # copy, convert, and tag input argument
1937: #
1938: lea FP_SRC(%a6),%a0
1939: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1940: mov.l 0x8+0x4(%a6),0x4(%a0)
1941: mov.l 0x8+0x8(%a6),0x8(%a0)
1942: bsr.l tag # fetch operand type
1943: mov.b %d0,STAG(%a6)
1944: mov.b %d0,%d1
1945:
1946: andi.l &0x00ff00ff,USER_FPSR(%a6)
1947:
1948: clr.l %d0
1949: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1950:
1951: tst.b %d1
1952: bne.b _L7_2x
1953: bsr.l sasin # operand is a NORM
1954: bra.b _L7_6x
1955: _L7_2x:
1956: cmpi.b %d1,&ZERO # is operand a ZERO?
1957: bne.b _L7_3x # no
1958: bsr.l src_zero # yes
1959: bra.b _L7_6x
1960: _L7_3x:
1961: cmpi.b %d1,&INF # is operand an INF?
1962: bne.b _L7_4x # no
1963: bsr.l t_operr # yes
1964: bra.b _L7_6x
1965: _L7_4x:
1966: cmpi.b %d1,&QNAN # is operand a QNAN?
1967: bne.b _L7_5x # no
1968: bsr.l src_qnan # yes
1969: bra.b _L7_6x
1970: _L7_5x:
1971: bsr.l sasind # operand is a DENORM
1972: _L7_6x:
1973:
1974: #
1975: # Result is now in FP0
1976: #
1977: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1978: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1979: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1980: unlk %a6
1981: rts
1982:
1983:
1984: #########################################################################
1985: # MONADIC TEMPLATE #
1986: #########################################################################
1987: global _fatanhs_
1988: _fatanhs_:
1989: link %a6,&-LOCAL_SIZE
1990:
1991: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1992: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1993: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1994:
1995: fmov.l &0x0,%fpcr # zero FPCR
1996:
1997: #
1998: # copy, convert, and tag input argument
1999: #
2000: fmov.s 0x8(%a6),%fp0 # load sgl input
2001: fmov.x %fp0,FP_SRC(%a6)
2002: lea FP_SRC(%a6),%a0
2003: bsr.l tag # fetch operand type
2004: mov.b %d0,STAG(%a6)
2005: mov.b %d0,%d1
2006:
2007: andi.l &0x00ff00ff,USER_FPSR(%a6)
2008:
2009: clr.l %d0
2010: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2011:
2012: tst.b %d1
2013: bne.b _L8_2s
2014: bsr.l satanh # operand is a NORM
2015: bra.b _L8_6s
2016: _L8_2s:
2017: cmpi.b %d1,&ZERO # is operand a ZERO?
2018: bne.b _L8_3s # no
2019: bsr.l src_zero # yes
2020: bra.b _L8_6s
2021: _L8_3s:
2022: cmpi.b %d1,&INF # is operand an INF?
2023: bne.b _L8_4s # no
2024: bsr.l t_operr # yes
2025: bra.b _L8_6s
2026: _L8_4s:
2027: cmpi.b %d1,&QNAN # is operand a QNAN?
2028: bne.b _L8_5s # no
2029: bsr.l src_qnan # yes
2030: bra.b _L8_6s
2031: _L8_5s:
2032: bsr.l satanhd # operand is a DENORM
2033: _L8_6s:
2034:
2035: #
2036: # Result is now in FP0
2037: #
2038: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2039: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2040: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2041: unlk %a6
2042: rts
2043:
2044: global _fatanhd_
2045: _fatanhd_:
2046: link %a6,&-LOCAL_SIZE
2047:
2048: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2049: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2050: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2051:
2052: fmov.l &0x0,%fpcr # zero FPCR
2053:
2054: #
2055: # copy, convert, and tag input argument
2056: #
2057: fmov.d 0x8(%a6),%fp0 # load dbl input
2058: fmov.x %fp0,FP_SRC(%a6)
2059: lea FP_SRC(%a6),%a0
2060: bsr.l tag # fetch operand type
2061: mov.b %d0,STAG(%a6)
2062: mov.b %d0,%d1
2063:
2064: andi.l &0x00ff00ff,USER_FPSR(%a6)
2065:
2066: clr.l %d0
2067: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2068:
2069: mov.b %d1,STAG(%a6)
2070: tst.b %d1
2071: bne.b _L8_2d
2072: bsr.l satanh # operand is a NORM
2073: bra.b _L8_6d
2074: _L8_2d:
2075: cmpi.b %d1,&ZERO # is operand a ZERO?
2076: bne.b _L8_3d # no
2077: bsr.l src_zero # yes
2078: bra.b _L8_6d
2079: _L8_3d:
2080: cmpi.b %d1,&INF # is operand an INF?
2081: bne.b _L8_4d # no
2082: bsr.l t_operr # yes
2083: bra.b _L8_6d
2084: _L8_4d:
2085: cmpi.b %d1,&QNAN # is operand a QNAN?
2086: bne.b _L8_5d # no
2087: bsr.l src_qnan # yes
2088: bra.b _L8_6d
2089: _L8_5d:
2090: bsr.l satanhd # operand is a DENORM
2091: _L8_6d:
2092:
2093: #
2094: # Result is now in FP0
2095: #
2096: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2097: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2098: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2099: unlk %a6
2100: rts
2101:
2102: global _fatanhx_
2103: _fatanhx_:
2104: link %a6,&-LOCAL_SIZE
2105:
2106: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2107: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2108: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2109:
2110: fmov.l &0x0,%fpcr # zero FPCR
2111:
2112: #
2113: # copy, convert, and tag input argument
2114: #
2115: lea FP_SRC(%a6),%a0
2116: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2117: mov.l 0x8+0x4(%a6),0x4(%a0)
2118: mov.l 0x8+0x8(%a6),0x8(%a0)
2119: bsr.l tag # fetch operand type
2120: mov.b %d0,STAG(%a6)
2121: mov.b %d0,%d1
2122:
2123: andi.l &0x00ff00ff,USER_FPSR(%a6)
2124:
2125: clr.l %d0
2126: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2127:
2128: tst.b %d1
2129: bne.b _L8_2x
2130: bsr.l satanh # operand is a NORM
2131: bra.b _L8_6x
2132: _L8_2x:
2133: cmpi.b %d1,&ZERO # is operand a ZERO?
2134: bne.b _L8_3x # no
2135: bsr.l src_zero # yes
2136: bra.b _L8_6x
2137: _L8_3x:
2138: cmpi.b %d1,&INF # is operand an INF?
2139: bne.b _L8_4x # no
2140: bsr.l t_operr # yes
2141: bra.b _L8_6x
2142: _L8_4x:
2143: cmpi.b %d1,&QNAN # is operand a QNAN?
2144: bne.b _L8_5x # no
2145: bsr.l src_qnan # yes
2146: bra.b _L8_6x
2147: _L8_5x:
2148: bsr.l satanhd # operand is a DENORM
2149: _L8_6x:
2150:
2151: #
2152: # Result is now in FP0
2153: #
2154: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2155: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2156: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2157: unlk %a6
2158: rts
2159:
2160:
2161: #########################################################################
2162: # MONADIC TEMPLATE #
2163: #########################################################################
2164: global _ftans_
2165: _ftans_:
2166: link %a6,&-LOCAL_SIZE
2167:
2168: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2169: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2170: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2171:
2172: fmov.l &0x0,%fpcr # zero FPCR
2173:
2174: #
2175: # copy, convert, and tag input argument
2176: #
2177: fmov.s 0x8(%a6),%fp0 # load sgl input
2178: fmov.x %fp0,FP_SRC(%a6)
2179: lea FP_SRC(%a6),%a0
2180: bsr.l tag # fetch operand type
2181: mov.b %d0,STAG(%a6)
2182: mov.b %d0,%d1
2183:
2184: andi.l &0x00ff00ff,USER_FPSR(%a6)
2185:
2186: clr.l %d0
2187: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2188:
2189: tst.b %d1
2190: bne.b _L9_2s
2191: bsr.l stan # operand is a NORM
2192: bra.b _L9_6s
2193: _L9_2s:
2194: cmpi.b %d1,&ZERO # is operand a ZERO?
2195: bne.b _L9_3s # no
2196: bsr.l src_zero # yes
2197: bra.b _L9_6s
2198: _L9_3s:
2199: cmpi.b %d1,&INF # is operand an INF?
2200: bne.b _L9_4s # no
2201: bsr.l t_operr # yes
2202: bra.b _L9_6s
2203: _L9_4s:
2204: cmpi.b %d1,&QNAN # is operand a QNAN?
2205: bne.b _L9_5s # no
2206: bsr.l src_qnan # yes
2207: bra.b _L9_6s
2208: _L9_5s:
2209: bsr.l stand # operand is a DENORM
2210: _L9_6s:
2211:
2212: #
2213: # Result is now in FP0
2214: #
2215: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2216: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2217: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2218: unlk %a6
2219: rts
2220:
2221: global _ftand_
2222: _ftand_:
2223: link %a6,&-LOCAL_SIZE
2224:
2225: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2226: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2227: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2228:
2229: fmov.l &0x0,%fpcr # zero FPCR
2230:
2231: #
2232: # copy, convert, and tag input argument
2233: #
2234: fmov.d 0x8(%a6),%fp0 # load dbl input
2235: fmov.x %fp0,FP_SRC(%a6)
2236: lea FP_SRC(%a6),%a0
2237: bsr.l tag # fetch operand type
2238: mov.b %d0,STAG(%a6)
2239: mov.b %d0,%d1
2240:
2241: andi.l &0x00ff00ff,USER_FPSR(%a6)
2242:
2243: clr.l %d0
2244: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2245:
2246: mov.b %d1,STAG(%a6)
2247: tst.b %d1
2248: bne.b _L9_2d
2249: bsr.l stan # operand is a NORM
2250: bra.b _L9_6d
2251: _L9_2d:
2252: cmpi.b %d1,&ZERO # is operand a ZERO?
2253: bne.b _L9_3d # no
2254: bsr.l src_zero # yes
2255: bra.b _L9_6d
2256: _L9_3d:
2257: cmpi.b %d1,&INF # is operand an INF?
2258: bne.b _L9_4d # no
2259: bsr.l t_operr # yes
2260: bra.b _L9_6d
2261: _L9_4d:
2262: cmpi.b %d1,&QNAN # is operand a QNAN?
2263: bne.b _L9_5d # no
2264: bsr.l src_qnan # yes
2265: bra.b _L9_6d
2266: _L9_5d:
2267: bsr.l stand # operand is a DENORM
2268: _L9_6d:
2269:
2270: #
2271: # Result is now in FP0
2272: #
2273: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2274: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2275: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2276: unlk %a6
2277: rts
2278:
2279: global _ftanx_
2280: _ftanx_:
2281: link %a6,&-LOCAL_SIZE
2282:
2283: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2284: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2285: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2286:
2287: fmov.l &0x0,%fpcr # zero FPCR
2288:
2289: #
2290: # copy, convert, and tag input argument
2291: #
2292: lea FP_SRC(%a6),%a0
2293: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2294: mov.l 0x8+0x4(%a6),0x4(%a0)
2295: mov.l 0x8+0x8(%a6),0x8(%a0)
2296: bsr.l tag # fetch operand type
2297: mov.b %d0,STAG(%a6)
2298: mov.b %d0,%d1
2299:
2300: andi.l &0x00ff00ff,USER_FPSR(%a6)
2301:
2302: clr.l %d0
2303: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2304:
2305: tst.b %d1
2306: bne.b _L9_2x
2307: bsr.l stan # operand is a NORM
2308: bra.b _L9_6x
2309: _L9_2x:
2310: cmpi.b %d1,&ZERO # is operand a ZERO?
2311: bne.b _L9_3x # no
2312: bsr.l src_zero # yes
2313: bra.b _L9_6x
2314: _L9_3x:
2315: cmpi.b %d1,&INF # is operand an INF?
2316: bne.b _L9_4x # no
2317: bsr.l t_operr # yes
2318: bra.b _L9_6x
2319: _L9_4x:
2320: cmpi.b %d1,&QNAN # is operand a QNAN?
2321: bne.b _L9_5x # no
2322: bsr.l src_qnan # yes
2323: bra.b _L9_6x
2324: _L9_5x:
2325: bsr.l stand # operand is a DENORM
2326: _L9_6x:
2327:
2328: #
2329: # Result is now in FP0
2330: #
2331: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2332: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2333: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2334: unlk %a6
2335: rts
2336:
2337:
2338: #########################################################################
2339: # MONADIC TEMPLATE #
2340: #########################################################################
2341: global _fetoxs_
2342: _fetoxs_:
2343: link %a6,&-LOCAL_SIZE
2344:
2345: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2346: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2347: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2348:
2349: fmov.l &0x0,%fpcr # zero FPCR
2350:
2351: #
2352: # copy, convert, and tag input argument
2353: #
2354: fmov.s 0x8(%a6),%fp0 # load sgl input
2355: fmov.x %fp0,FP_SRC(%a6)
2356: lea FP_SRC(%a6),%a0
2357: bsr.l tag # fetch operand type
2358: mov.b %d0,STAG(%a6)
2359: mov.b %d0,%d1
2360:
2361: andi.l &0x00ff00ff,USER_FPSR(%a6)
2362:
2363: clr.l %d0
2364: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2365:
2366: tst.b %d1
2367: bne.b _L10_2s
2368: bsr.l setox # operand is a NORM
2369: bra.b _L10_6s
2370: _L10_2s:
2371: cmpi.b %d1,&ZERO # is operand a ZERO?
2372: bne.b _L10_3s # no
2373: bsr.l ld_pone # yes
2374: bra.b _L10_6s
2375: _L10_3s:
2376: cmpi.b %d1,&INF # is operand an INF?
2377: bne.b _L10_4s # no
2378: bsr.l szr_inf # yes
2379: bra.b _L10_6s
2380: _L10_4s:
2381: cmpi.b %d1,&QNAN # is operand a QNAN?
2382: bne.b _L10_5s # no
2383: bsr.l src_qnan # yes
2384: bra.b _L10_6s
2385: _L10_5s:
2386: bsr.l setoxd # operand is a DENORM
2387: _L10_6s:
2388:
2389: #
2390: # Result is now in FP0
2391: #
2392: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2393: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2394: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2395: unlk %a6
2396: rts
2397:
2398: global _fetoxd_
2399: _fetoxd_:
2400: link %a6,&-LOCAL_SIZE
2401:
2402: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2403: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2404: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2405:
2406: fmov.l &0x0,%fpcr # zero FPCR
2407:
2408: #
2409: # copy, convert, and tag input argument
2410: #
2411: fmov.d 0x8(%a6),%fp0 # load dbl input
2412: fmov.x %fp0,FP_SRC(%a6)
2413: lea FP_SRC(%a6),%a0
2414: bsr.l tag # fetch operand type
2415: mov.b %d0,STAG(%a6)
2416: mov.b %d0,%d1
2417:
2418: andi.l &0x00ff00ff,USER_FPSR(%a6)
2419:
2420: clr.l %d0
2421: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2422:
2423: mov.b %d1,STAG(%a6)
2424: tst.b %d1
2425: bne.b _L10_2d
2426: bsr.l setox # operand is a NORM
2427: bra.b _L10_6d
2428: _L10_2d:
2429: cmpi.b %d1,&ZERO # is operand a ZERO?
2430: bne.b _L10_3d # no
2431: bsr.l ld_pone # yes
2432: bra.b _L10_6d
2433: _L10_3d:
2434: cmpi.b %d1,&INF # is operand an INF?
2435: bne.b _L10_4d # no
2436: bsr.l szr_inf # yes
2437: bra.b _L10_6d
2438: _L10_4d:
2439: cmpi.b %d1,&QNAN # is operand a QNAN?
2440: bne.b _L10_5d # no
2441: bsr.l src_qnan # yes
2442: bra.b _L10_6d
2443: _L10_5d:
2444: bsr.l setoxd # operand is a DENORM
2445: _L10_6d:
2446:
2447: #
2448: # Result is now in FP0
2449: #
2450: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2451: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2452: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2453: unlk %a6
2454: rts
2455:
2456: global _fetoxx_
2457: _fetoxx_:
2458: link %a6,&-LOCAL_SIZE
2459:
2460: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2461: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2462: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2463:
2464: fmov.l &0x0,%fpcr # zero FPCR
2465:
2466: #
2467: # copy, convert, and tag input argument
2468: #
2469: lea FP_SRC(%a6),%a0
2470: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2471: mov.l 0x8+0x4(%a6),0x4(%a0)
2472: mov.l 0x8+0x8(%a6),0x8(%a0)
2473: bsr.l tag # fetch operand type
2474: mov.b %d0,STAG(%a6)
2475: mov.b %d0,%d1
2476:
2477: andi.l &0x00ff00ff,USER_FPSR(%a6)
2478:
2479: clr.l %d0
2480: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2481:
2482: tst.b %d1
2483: bne.b _L10_2x
2484: bsr.l setox # operand is a NORM
2485: bra.b _L10_6x
2486: _L10_2x:
2487: cmpi.b %d1,&ZERO # is operand a ZERO?
2488: bne.b _L10_3x # no
2489: bsr.l ld_pone # yes
2490: bra.b _L10_6x
2491: _L10_3x:
2492: cmpi.b %d1,&INF # is operand an INF?
2493: bne.b _L10_4x # no
2494: bsr.l szr_inf # yes
2495: bra.b _L10_6x
2496: _L10_4x:
2497: cmpi.b %d1,&QNAN # is operand a QNAN?
2498: bne.b _L10_5x # no
2499: bsr.l src_qnan # yes
2500: bra.b _L10_6x
2501: _L10_5x:
2502: bsr.l setoxd # operand is a DENORM
2503: _L10_6x:
2504:
2505: #
2506: # Result is now in FP0
2507: #
2508: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2509: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2510: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2511: unlk %a6
2512: rts
2513:
2514:
2515: #########################################################################
2516: # MONADIC TEMPLATE #
2517: #########################################################################
2518: global _ftwotoxs_
2519: _ftwotoxs_:
2520: link %a6,&-LOCAL_SIZE
2521:
2522: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2523: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2524: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2525:
2526: fmov.l &0x0,%fpcr # zero FPCR
2527:
2528: #
2529: # copy, convert, and tag input argument
2530: #
2531: fmov.s 0x8(%a6),%fp0 # load sgl input
2532: fmov.x %fp0,FP_SRC(%a6)
2533: lea FP_SRC(%a6),%a0
2534: bsr.l tag # fetch operand type
2535: mov.b %d0,STAG(%a6)
2536: mov.b %d0,%d1
2537:
2538: andi.l &0x00ff00ff,USER_FPSR(%a6)
2539:
2540: clr.l %d0
2541: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2542:
2543: tst.b %d1
2544: bne.b _L11_2s
2545: bsr.l stwotox # operand is a NORM
2546: bra.b _L11_6s
2547: _L11_2s:
2548: cmpi.b %d1,&ZERO # is operand a ZERO?
2549: bne.b _L11_3s # no
2550: bsr.l ld_pone # yes
2551: bra.b _L11_6s
2552: _L11_3s:
2553: cmpi.b %d1,&INF # is operand an INF?
2554: bne.b _L11_4s # no
2555: bsr.l szr_inf # yes
2556: bra.b _L11_6s
2557: _L11_4s:
2558: cmpi.b %d1,&QNAN # is operand a QNAN?
2559: bne.b _L11_5s # no
2560: bsr.l src_qnan # yes
2561: bra.b _L11_6s
2562: _L11_5s:
2563: bsr.l stwotoxd # operand is a DENORM
2564: _L11_6s:
2565:
2566: #
2567: # Result is now in FP0
2568: #
2569: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2570: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2571: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2572: unlk %a6
2573: rts
2574:
2575: global _ftwotoxd_
2576: _ftwotoxd_:
2577: link %a6,&-LOCAL_SIZE
2578:
2579: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2580: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2581: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2582:
2583: fmov.l &0x0,%fpcr # zero FPCR
2584:
2585: #
2586: # copy, convert, and tag input argument
2587: #
2588: fmov.d 0x8(%a6),%fp0 # load dbl input
2589: fmov.x %fp0,FP_SRC(%a6)
2590: lea FP_SRC(%a6),%a0
2591: bsr.l tag # fetch operand type
2592: mov.b %d0,STAG(%a6)
2593: mov.b %d0,%d1
2594:
2595: andi.l &0x00ff00ff,USER_FPSR(%a6)
2596:
2597: clr.l %d0
2598: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2599:
2600: mov.b %d1,STAG(%a6)
2601: tst.b %d1
2602: bne.b _L11_2d
2603: bsr.l stwotox # operand is a NORM
2604: bra.b _L11_6d
2605: _L11_2d:
2606: cmpi.b %d1,&ZERO # is operand a ZERO?
2607: bne.b _L11_3d # no
2608: bsr.l ld_pone # yes
2609: bra.b _L11_6d
2610: _L11_3d:
2611: cmpi.b %d1,&INF # is operand an INF?
2612: bne.b _L11_4d # no
2613: bsr.l szr_inf # yes
2614: bra.b _L11_6d
2615: _L11_4d:
2616: cmpi.b %d1,&QNAN # is operand a QNAN?
2617: bne.b _L11_5d # no
2618: bsr.l src_qnan # yes
2619: bra.b _L11_6d
2620: _L11_5d:
2621: bsr.l stwotoxd # operand is a DENORM
2622: _L11_6d:
2623:
2624: #
2625: # Result is now in FP0
2626: #
2627: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2628: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2629: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2630: unlk %a6
2631: rts
2632:
2633: global _ftwotoxx_
2634: _ftwotoxx_:
2635: link %a6,&-LOCAL_SIZE
2636:
2637: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2638: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2639: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2640:
2641: fmov.l &0x0,%fpcr # zero FPCR
2642:
2643: #
2644: # copy, convert, and tag input argument
2645: #
2646: lea FP_SRC(%a6),%a0
2647: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2648: mov.l 0x8+0x4(%a6),0x4(%a0)
2649: mov.l 0x8+0x8(%a6),0x8(%a0)
2650: bsr.l tag # fetch operand type
2651: mov.b %d0,STAG(%a6)
2652: mov.b %d0,%d1
2653:
2654: andi.l &0x00ff00ff,USER_FPSR(%a6)
2655:
2656: clr.l %d0
2657: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2658:
2659: tst.b %d1
2660: bne.b _L11_2x
2661: bsr.l stwotox # operand is a NORM
2662: bra.b _L11_6x
2663: _L11_2x:
2664: cmpi.b %d1,&ZERO # is operand a ZERO?
2665: bne.b _L11_3x # no
2666: bsr.l ld_pone # yes
2667: bra.b _L11_6x
2668: _L11_3x:
2669: cmpi.b %d1,&INF # is operand an INF?
2670: bne.b _L11_4x # no
2671: bsr.l szr_inf # yes
2672: bra.b _L11_6x
2673: _L11_4x:
2674: cmpi.b %d1,&QNAN # is operand a QNAN?
2675: bne.b _L11_5x # no
2676: bsr.l src_qnan # yes
2677: bra.b _L11_6x
2678: _L11_5x:
2679: bsr.l stwotoxd # operand is a DENORM
2680: _L11_6x:
2681:
2682: #
2683: # Result is now in FP0
2684: #
2685: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2686: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2687: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2688: unlk %a6
2689: rts
2690:
2691:
2692: #########################################################################
2693: # MONADIC TEMPLATE #
2694: #########################################################################
2695: global _ftentoxs_
2696: _ftentoxs_:
2697: link %a6,&-LOCAL_SIZE
2698:
2699: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2700: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2701: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2702:
2703: fmov.l &0x0,%fpcr # zero FPCR
2704:
2705: #
2706: # copy, convert, and tag input argument
2707: #
2708: fmov.s 0x8(%a6),%fp0 # load sgl input
2709: fmov.x %fp0,FP_SRC(%a6)
2710: lea FP_SRC(%a6),%a0
2711: bsr.l tag # fetch operand type
2712: mov.b %d0,STAG(%a6)
2713: mov.b %d0,%d1
2714:
2715: andi.l &0x00ff00ff,USER_FPSR(%a6)
2716:
2717: clr.l %d0
2718: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2719:
2720: tst.b %d1
2721: bne.b _L12_2s
2722: bsr.l stentox # operand is a NORM
2723: bra.b _L12_6s
2724: _L12_2s:
2725: cmpi.b %d1,&ZERO # is operand a ZERO?
2726: bne.b _L12_3s # no
2727: bsr.l ld_pone # yes
2728: bra.b _L12_6s
2729: _L12_3s:
2730: cmpi.b %d1,&INF # is operand an INF?
2731: bne.b _L12_4s # no
2732: bsr.l szr_inf # yes
2733: bra.b _L12_6s
2734: _L12_4s:
2735: cmpi.b %d1,&QNAN # is operand a QNAN?
2736: bne.b _L12_5s # no
2737: bsr.l src_qnan # yes
2738: bra.b _L12_6s
2739: _L12_5s:
2740: bsr.l stentoxd # operand is a DENORM
2741: _L12_6s:
2742:
2743: #
2744: # Result is now in FP0
2745: #
2746: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2747: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2748: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2749: unlk %a6
2750: rts
2751:
2752: global _ftentoxd_
2753: _ftentoxd_:
2754: link %a6,&-LOCAL_SIZE
2755:
2756: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2757: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2758: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2759:
2760: fmov.l &0x0,%fpcr # zero FPCR
2761:
2762: #
2763: # copy, convert, and tag input argument
2764: #
2765: fmov.d 0x8(%a6),%fp0 # load dbl input
2766: fmov.x %fp0,FP_SRC(%a6)
2767: lea FP_SRC(%a6),%a0
2768: bsr.l tag # fetch operand type
2769: mov.b %d0,STAG(%a6)
2770: mov.b %d0,%d1
2771:
2772: andi.l &0x00ff00ff,USER_FPSR(%a6)
2773:
2774: clr.l %d0
2775: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2776:
2777: mov.b %d1,STAG(%a6)
2778: tst.b %d1
2779: bne.b _L12_2d
2780: bsr.l stentox # operand is a NORM
2781: bra.b _L12_6d
2782: _L12_2d:
2783: cmpi.b %d1,&ZERO # is operand a ZERO?
2784: bne.b _L12_3d # no
2785: bsr.l ld_pone # yes
2786: bra.b _L12_6d
2787: _L12_3d:
2788: cmpi.b %d1,&INF # is operand an INF?
2789: bne.b _L12_4d # no
2790: bsr.l szr_inf # yes
2791: bra.b _L12_6d
2792: _L12_4d:
2793: cmpi.b %d1,&QNAN # is operand a QNAN?
2794: bne.b _L12_5d # no
2795: bsr.l src_qnan # yes
2796: bra.b _L12_6d
2797: _L12_5d:
2798: bsr.l stentoxd # operand is a DENORM
2799: _L12_6d:
2800:
2801: #
2802: # Result is now in FP0
2803: #
2804: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2805: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2806: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2807: unlk %a6
2808: rts
2809:
2810: global _ftentoxx_
2811: _ftentoxx_:
2812: link %a6,&-LOCAL_SIZE
2813:
2814: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2815: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2816: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2817:
2818: fmov.l &0x0,%fpcr # zero FPCR
2819:
2820: #
2821: # copy, convert, and tag input argument
2822: #
2823: lea FP_SRC(%a6),%a0
2824: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2825: mov.l 0x8+0x4(%a6),0x4(%a0)
2826: mov.l 0x8+0x8(%a6),0x8(%a0)
2827: bsr.l tag # fetch operand type
2828: mov.b %d0,STAG(%a6)
2829: mov.b %d0,%d1
2830:
2831: andi.l &0x00ff00ff,USER_FPSR(%a6)
2832:
2833: clr.l %d0
2834: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2835:
2836: tst.b %d1
2837: bne.b _L12_2x
2838: bsr.l stentox # operand is a NORM
2839: bra.b _L12_6x
2840: _L12_2x:
2841: cmpi.b %d1,&ZERO # is operand a ZERO?
2842: bne.b _L12_3x # no
2843: bsr.l ld_pone # yes
2844: bra.b _L12_6x
2845: _L12_3x:
2846: cmpi.b %d1,&INF # is operand an INF?
2847: bne.b _L12_4x # no
2848: bsr.l szr_inf # yes
2849: bra.b _L12_6x
2850: _L12_4x:
2851: cmpi.b %d1,&QNAN # is operand a QNAN?
2852: bne.b _L12_5x # no
2853: bsr.l src_qnan # yes
2854: bra.b _L12_6x
2855: _L12_5x:
2856: bsr.l stentoxd # operand is a DENORM
2857: _L12_6x:
2858:
2859: #
2860: # Result is now in FP0
2861: #
2862: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2863: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2864: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2865: unlk %a6
2866: rts
2867:
2868:
2869: #########################################################################
2870: # MONADIC TEMPLATE #
2871: #########################################################################
2872: global _flogns_
2873: _flogns_:
2874: link %a6,&-LOCAL_SIZE
2875:
2876: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2877: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2878: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2879:
2880: fmov.l &0x0,%fpcr # zero FPCR
2881:
2882: #
2883: # copy, convert, and tag input argument
2884: #
2885: fmov.s 0x8(%a6),%fp0 # load sgl input
2886: fmov.x %fp0,FP_SRC(%a6)
2887: lea FP_SRC(%a6),%a0
2888: bsr.l tag # fetch operand type
2889: mov.b %d0,STAG(%a6)
2890: mov.b %d0,%d1
2891:
2892: andi.l &0x00ff00ff,USER_FPSR(%a6)
2893:
2894: clr.l %d0
2895: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2896:
2897: tst.b %d1
2898: bne.b _L13_2s
2899: bsr.l slogn # operand is a NORM
2900: bra.b _L13_6s
2901: _L13_2s:
2902: cmpi.b %d1,&ZERO # is operand a ZERO?
2903: bne.b _L13_3s # no
2904: bsr.l t_dz2 # yes
2905: bra.b _L13_6s
2906: _L13_3s:
2907: cmpi.b %d1,&INF # is operand an INF?
2908: bne.b _L13_4s # no
2909: bsr.l sopr_inf # yes
2910: bra.b _L13_6s
2911: _L13_4s:
2912: cmpi.b %d1,&QNAN # is operand a QNAN?
2913: bne.b _L13_5s # no
2914: bsr.l src_qnan # yes
2915: bra.b _L13_6s
2916: _L13_5s:
2917: bsr.l slognd # operand is a DENORM
2918: _L13_6s:
2919:
2920: #
2921: # Result is now in FP0
2922: #
2923: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2924: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2925: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2926: unlk %a6
2927: rts
2928:
2929: global _flognd_
2930: _flognd_:
2931: link %a6,&-LOCAL_SIZE
2932:
2933: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2934: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2935: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2936:
2937: fmov.l &0x0,%fpcr # zero FPCR
2938:
2939: #
2940: # copy, convert, and tag input argument
2941: #
2942: fmov.d 0x8(%a6),%fp0 # load dbl input
2943: fmov.x %fp0,FP_SRC(%a6)
2944: lea FP_SRC(%a6),%a0
2945: bsr.l tag # fetch operand type
2946: mov.b %d0,STAG(%a6)
2947: mov.b %d0,%d1
2948:
2949: andi.l &0x00ff00ff,USER_FPSR(%a6)
2950:
2951: clr.l %d0
2952: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2953:
2954: mov.b %d1,STAG(%a6)
2955: tst.b %d1
2956: bne.b _L13_2d
2957: bsr.l slogn # operand is a NORM
2958: bra.b _L13_6d
2959: _L13_2d:
2960: cmpi.b %d1,&ZERO # is operand a ZERO?
2961: bne.b _L13_3d # no
2962: bsr.l t_dz2 # yes
2963: bra.b _L13_6d
2964: _L13_3d:
2965: cmpi.b %d1,&INF # is operand an INF?
2966: bne.b _L13_4d # no
2967: bsr.l sopr_inf # yes
2968: bra.b _L13_6d
2969: _L13_4d:
2970: cmpi.b %d1,&QNAN # is operand a QNAN?
2971: bne.b _L13_5d # no
2972: bsr.l src_qnan # yes
2973: bra.b _L13_6d
2974: _L13_5d:
2975: bsr.l slognd # operand is a DENORM
2976: _L13_6d:
2977:
2978: #
2979: # Result is now in FP0
2980: #
2981: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2982: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2983: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2984: unlk %a6
2985: rts
2986:
2987: global _flognx_
2988: _flognx_:
2989: link %a6,&-LOCAL_SIZE
2990:
2991: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2992: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2993: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2994:
2995: fmov.l &0x0,%fpcr # zero FPCR
2996:
2997: #
2998: # copy, convert, and tag input argument
2999: #
3000: lea FP_SRC(%a6),%a0
3001: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3002: mov.l 0x8+0x4(%a6),0x4(%a0)
3003: mov.l 0x8+0x8(%a6),0x8(%a0)
3004: bsr.l tag # fetch operand type
3005: mov.b %d0,STAG(%a6)
3006: mov.b %d0,%d1
3007:
3008: andi.l &0x00ff00ff,USER_FPSR(%a6)
3009:
3010: clr.l %d0
3011: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3012:
3013: tst.b %d1
3014: bne.b _L13_2x
3015: bsr.l slogn # operand is a NORM
3016: bra.b _L13_6x
3017: _L13_2x:
3018: cmpi.b %d1,&ZERO # is operand a ZERO?
3019: bne.b _L13_3x # no
3020: bsr.l t_dz2 # yes
3021: bra.b _L13_6x
3022: _L13_3x:
3023: cmpi.b %d1,&INF # is operand an INF?
3024: bne.b _L13_4x # no
3025: bsr.l sopr_inf # yes
3026: bra.b _L13_6x
3027: _L13_4x:
3028: cmpi.b %d1,&QNAN # is operand a QNAN?
3029: bne.b _L13_5x # no
3030: bsr.l src_qnan # yes
3031: bra.b _L13_6x
3032: _L13_5x:
3033: bsr.l slognd # operand is a DENORM
3034: _L13_6x:
3035:
3036: #
3037: # Result is now in FP0
3038: #
3039: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3040: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3041: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3042: unlk %a6
3043: rts
3044:
3045:
3046: #########################################################################
3047: # MONADIC TEMPLATE #
3048: #########################################################################
3049: global _flog10s_
3050: _flog10s_:
3051: link %a6,&-LOCAL_SIZE
3052:
3053: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3054: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3055: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3056:
3057: fmov.l &0x0,%fpcr # zero FPCR
3058:
3059: #
3060: # copy, convert, and tag input argument
3061: #
3062: fmov.s 0x8(%a6),%fp0 # load sgl input
3063: fmov.x %fp0,FP_SRC(%a6)
3064: lea FP_SRC(%a6),%a0
3065: bsr.l tag # fetch operand type
3066: mov.b %d0,STAG(%a6)
3067: mov.b %d0,%d1
3068:
3069: andi.l &0x00ff00ff,USER_FPSR(%a6)
3070:
3071: clr.l %d0
3072: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3073:
3074: tst.b %d1
3075: bne.b _L14_2s
3076: bsr.l slog10 # operand is a NORM
3077: bra.b _L14_6s
3078: _L14_2s:
3079: cmpi.b %d1,&ZERO # is operand a ZERO?
3080: bne.b _L14_3s # no
3081: bsr.l t_dz2 # yes
3082: bra.b _L14_6s
3083: _L14_3s:
3084: cmpi.b %d1,&INF # is operand an INF?
3085: bne.b _L14_4s # no
3086: bsr.l sopr_inf # yes
3087: bra.b _L14_6s
3088: _L14_4s:
3089: cmpi.b %d1,&QNAN # is operand a QNAN?
3090: bne.b _L14_5s # no
3091: bsr.l src_qnan # yes
3092: bra.b _L14_6s
3093: _L14_5s:
3094: bsr.l slog10d # operand is a DENORM
3095: _L14_6s:
3096:
3097: #
3098: # Result is now in FP0
3099: #
3100: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3101: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3102: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3103: unlk %a6
3104: rts
3105:
3106: global _flog10d_
3107: _flog10d_:
3108: link %a6,&-LOCAL_SIZE
3109:
3110: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3111: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3112: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3113:
3114: fmov.l &0x0,%fpcr # zero FPCR
3115:
3116: #
3117: # copy, convert, and tag input argument
3118: #
3119: fmov.d 0x8(%a6),%fp0 # load dbl input
3120: fmov.x %fp0,FP_SRC(%a6)
3121: lea FP_SRC(%a6),%a0
3122: bsr.l tag # fetch operand type
3123: mov.b %d0,STAG(%a6)
3124: mov.b %d0,%d1
3125:
3126: andi.l &0x00ff00ff,USER_FPSR(%a6)
3127:
3128: clr.l %d0
3129: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3130:
3131: mov.b %d1,STAG(%a6)
3132: tst.b %d1
3133: bne.b _L14_2d
3134: bsr.l slog10 # operand is a NORM
3135: bra.b _L14_6d
3136: _L14_2d:
3137: cmpi.b %d1,&ZERO # is operand a ZERO?
3138: bne.b _L14_3d # no
3139: bsr.l t_dz2 # yes
3140: bra.b _L14_6d
3141: _L14_3d:
3142: cmpi.b %d1,&INF # is operand an INF?
3143: bne.b _L14_4d # no
3144: bsr.l sopr_inf # yes
3145: bra.b _L14_6d
3146: _L14_4d:
3147: cmpi.b %d1,&QNAN # is operand a QNAN?
3148: bne.b _L14_5d # no
3149: bsr.l src_qnan # yes
3150: bra.b _L14_6d
3151: _L14_5d:
3152: bsr.l slog10d # operand is a DENORM
3153: _L14_6d:
3154:
3155: #
3156: # Result is now in FP0
3157: #
3158: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3159: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3160: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3161: unlk %a6
3162: rts
3163:
3164: global _flog10x_
3165: _flog10x_:
3166: link %a6,&-LOCAL_SIZE
3167:
3168: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3169: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3170: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3171:
3172: fmov.l &0x0,%fpcr # zero FPCR
3173:
3174: #
3175: # copy, convert, and tag input argument
3176: #
3177: lea FP_SRC(%a6),%a0
3178: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3179: mov.l 0x8+0x4(%a6),0x4(%a0)
3180: mov.l 0x8+0x8(%a6),0x8(%a0)
3181: bsr.l tag # fetch operand type
3182: mov.b %d0,STAG(%a6)
3183: mov.b %d0,%d1
3184:
3185: andi.l &0x00ff00ff,USER_FPSR(%a6)
3186:
3187: clr.l %d0
3188: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3189:
3190: tst.b %d1
3191: bne.b _L14_2x
3192: bsr.l slog10 # operand is a NORM
3193: bra.b _L14_6x
3194: _L14_2x:
3195: cmpi.b %d1,&ZERO # is operand a ZERO?
3196: bne.b _L14_3x # no
3197: bsr.l t_dz2 # yes
3198: bra.b _L14_6x
3199: _L14_3x:
3200: cmpi.b %d1,&INF # is operand an INF?
3201: bne.b _L14_4x # no
3202: bsr.l sopr_inf # yes
3203: bra.b _L14_6x
3204: _L14_4x:
3205: cmpi.b %d1,&QNAN # is operand a QNAN?
3206: bne.b _L14_5x # no
3207: bsr.l src_qnan # yes
3208: bra.b _L14_6x
3209: _L14_5x:
3210: bsr.l slog10d # operand is a DENORM
3211: _L14_6x:
3212:
3213: #
3214: # Result is now in FP0
3215: #
3216: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3217: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3218: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3219: unlk %a6
3220: rts
3221:
3222:
3223: #########################################################################
3224: # MONADIC TEMPLATE #
3225: #########################################################################
3226: global _flog2s_
3227: _flog2s_:
3228: link %a6,&-LOCAL_SIZE
3229:
3230: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3231: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3232: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3233:
3234: fmov.l &0x0,%fpcr # zero FPCR
3235:
3236: #
3237: # copy, convert, and tag input argument
3238: #
3239: fmov.s 0x8(%a6),%fp0 # load sgl input
3240: fmov.x %fp0,FP_SRC(%a6)
3241: lea FP_SRC(%a6),%a0
3242: bsr.l tag # fetch operand type
3243: mov.b %d0,STAG(%a6)
3244: mov.b %d0,%d1
3245:
3246: andi.l &0x00ff00ff,USER_FPSR(%a6)
3247:
3248: clr.l %d0
3249: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3250:
3251: tst.b %d1
3252: bne.b _L15_2s
3253: bsr.l slog2 # operand is a NORM
3254: bra.b _L15_6s
3255: _L15_2s:
3256: cmpi.b %d1,&ZERO # is operand a ZERO?
3257: bne.b _L15_3s # no
3258: bsr.l t_dz2 # yes
3259: bra.b _L15_6s
3260: _L15_3s:
3261: cmpi.b %d1,&INF # is operand an INF?
3262: bne.b _L15_4s # no
3263: bsr.l sopr_inf # yes
3264: bra.b _L15_6s
3265: _L15_4s:
3266: cmpi.b %d1,&QNAN # is operand a QNAN?
3267: bne.b _L15_5s # no
3268: bsr.l src_qnan # yes
3269: bra.b _L15_6s
3270: _L15_5s:
3271: bsr.l slog2d # operand is a DENORM
3272: _L15_6s:
3273:
3274: #
3275: # Result is now in FP0
3276: #
3277: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3278: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3279: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3280: unlk %a6
3281: rts
3282:
3283: global _flog2d_
3284: _flog2d_:
3285: link %a6,&-LOCAL_SIZE
3286:
3287: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3288: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3289: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3290:
3291: fmov.l &0x0,%fpcr # zero FPCR
3292:
3293: #
3294: # copy, convert, and tag input argument
3295: #
3296: fmov.d 0x8(%a6),%fp0 # load dbl input
3297: fmov.x %fp0,FP_SRC(%a6)
3298: lea FP_SRC(%a6),%a0
3299: bsr.l tag # fetch operand type
3300: mov.b %d0,STAG(%a6)
3301: mov.b %d0,%d1
3302:
3303: andi.l &0x00ff00ff,USER_FPSR(%a6)
3304:
3305: clr.l %d0
3306: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3307:
3308: mov.b %d1,STAG(%a6)
3309: tst.b %d1
3310: bne.b _L15_2d
3311: bsr.l slog2 # operand is a NORM
3312: bra.b _L15_6d
3313: _L15_2d:
3314: cmpi.b %d1,&ZERO # is operand a ZERO?
3315: bne.b _L15_3d # no
3316: bsr.l t_dz2 # yes
3317: bra.b _L15_6d
3318: _L15_3d:
3319: cmpi.b %d1,&INF # is operand an INF?
3320: bne.b _L15_4d # no
3321: bsr.l sopr_inf # yes
3322: bra.b _L15_6d
3323: _L15_4d:
3324: cmpi.b %d1,&QNAN # is operand a QNAN?
3325: bne.b _L15_5d # no
3326: bsr.l src_qnan # yes
3327: bra.b _L15_6d
3328: _L15_5d:
3329: bsr.l slog2d # operand is a DENORM
3330: _L15_6d:
3331:
3332: #
3333: # Result is now in FP0
3334: #
3335: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3336: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3337: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3338: unlk %a6
3339: rts
3340:
3341: global _flog2x_
3342: _flog2x_:
3343: link %a6,&-LOCAL_SIZE
3344:
3345: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3346: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3347: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3348:
3349: fmov.l &0x0,%fpcr # zero FPCR
3350:
3351: #
3352: # copy, convert, and tag input argument
3353: #
3354: lea FP_SRC(%a6),%a0
3355: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3356: mov.l 0x8+0x4(%a6),0x4(%a0)
3357: mov.l 0x8+0x8(%a6),0x8(%a0)
3358: bsr.l tag # fetch operand type
3359: mov.b %d0,STAG(%a6)
3360: mov.b %d0,%d1
3361:
3362: andi.l &0x00ff00ff,USER_FPSR(%a6)
3363:
3364: clr.l %d0
3365: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3366:
3367: tst.b %d1
3368: bne.b _L15_2x
3369: bsr.l slog2 # operand is a NORM
3370: bra.b _L15_6x
3371: _L15_2x:
3372: cmpi.b %d1,&ZERO # is operand a ZERO?
3373: bne.b _L15_3x # no
3374: bsr.l t_dz2 # yes
3375: bra.b _L15_6x
3376: _L15_3x:
3377: cmpi.b %d1,&INF # is operand an INF?
3378: bne.b _L15_4x # no
3379: bsr.l sopr_inf # yes
3380: bra.b _L15_6x
3381: _L15_4x:
3382: cmpi.b %d1,&QNAN # is operand a QNAN?
3383: bne.b _L15_5x # no
3384: bsr.l src_qnan # yes
3385: bra.b _L15_6x
3386: _L15_5x:
3387: bsr.l slog2d # operand is a DENORM
3388: _L15_6x:
3389:
3390: #
3391: # Result is now in FP0
3392: #
3393: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3394: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3395: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3396: unlk %a6
3397: rts
3398:
3399:
3400: #########################################################################
3401: # MONADIC TEMPLATE #
3402: #########################################################################
3403: global _fcoshs_
3404: _fcoshs_:
3405: link %a6,&-LOCAL_SIZE
3406:
3407: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3408: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3409: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3410:
3411: fmov.l &0x0,%fpcr # zero FPCR
3412:
3413: #
3414: # copy, convert, and tag input argument
3415: #
3416: fmov.s 0x8(%a6),%fp0 # load sgl input
3417: fmov.x %fp0,FP_SRC(%a6)
3418: lea FP_SRC(%a6),%a0
3419: bsr.l tag # fetch operand type
3420: mov.b %d0,STAG(%a6)
3421: mov.b %d0,%d1
3422:
3423: andi.l &0x00ff00ff,USER_FPSR(%a6)
3424:
3425: clr.l %d0
3426: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3427:
3428: tst.b %d1
3429: bne.b _L16_2s
3430: bsr.l scosh # operand is a NORM
3431: bra.b _L16_6s
3432: _L16_2s:
3433: cmpi.b %d1,&ZERO # is operand a ZERO?
3434: bne.b _L16_3s # no
3435: bsr.l ld_pone # yes
3436: bra.b _L16_6s
3437: _L16_3s:
3438: cmpi.b %d1,&INF # is operand an INF?
3439: bne.b _L16_4s # no
3440: bsr.l ld_pinf # yes
3441: bra.b _L16_6s
3442: _L16_4s:
3443: cmpi.b %d1,&QNAN # is operand a QNAN?
3444: bne.b _L16_5s # no
3445: bsr.l src_qnan # yes
3446: bra.b _L16_6s
3447: _L16_5s:
3448: bsr.l scoshd # operand is a DENORM
3449: _L16_6s:
3450:
3451: #
3452: # Result is now in FP0
3453: #
3454: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3455: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3456: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3457: unlk %a6
3458: rts
3459:
3460: global _fcoshd_
3461: _fcoshd_:
3462: link %a6,&-LOCAL_SIZE
3463:
3464: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3465: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3466: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3467:
3468: fmov.l &0x0,%fpcr # zero FPCR
3469:
3470: #
3471: # copy, convert, and tag input argument
3472: #
3473: fmov.d 0x8(%a6),%fp0 # load dbl input
3474: fmov.x %fp0,FP_SRC(%a6)
3475: lea FP_SRC(%a6),%a0
3476: bsr.l tag # fetch operand type
3477: mov.b %d0,STAG(%a6)
3478: mov.b %d0,%d1
3479:
3480: andi.l &0x00ff00ff,USER_FPSR(%a6)
3481:
3482: clr.l %d0
3483: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3484:
3485: mov.b %d1,STAG(%a6)
3486: tst.b %d1
3487: bne.b _L16_2d
3488: bsr.l scosh # operand is a NORM
3489: bra.b _L16_6d
3490: _L16_2d:
3491: cmpi.b %d1,&ZERO # is operand a ZERO?
3492: bne.b _L16_3d # no
3493: bsr.l ld_pone # yes
3494: bra.b _L16_6d
3495: _L16_3d:
3496: cmpi.b %d1,&INF # is operand an INF?
3497: bne.b _L16_4d # no
3498: bsr.l ld_pinf # yes
3499: bra.b _L16_6d
3500: _L16_4d:
3501: cmpi.b %d1,&QNAN # is operand a QNAN?
3502: bne.b _L16_5d # no
3503: bsr.l src_qnan # yes
3504: bra.b _L16_6d
3505: _L16_5d:
3506: bsr.l scoshd # operand is a DENORM
3507: _L16_6d:
3508:
3509: #
3510: # Result is now in FP0
3511: #
3512: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3513: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3514: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3515: unlk %a6
3516: rts
3517:
3518: global _fcoshx_
3519: _fcoshx_:
3520: link %a6,&-LOCAL_SIZE
3521:
3522: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3523: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3524: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3525:
3526: fmov.l &0x0,%fpcr # zero FPCR
3527:
3528: #
3529: # copy, convert, and tag input argument
3530: #
3531: lea FP_SRC(%a6),%a0
3532: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3533: mov.l 0x8+0x4(%a6),0x4(%a0)
3534: mov.l 0x8+0x8(%a6),0x8(%a0)
3535: bsr.l tag # fetch operand type
3536: mov.b %d0,STAG(%a6)
3537: mov.b %d0,%d1
3538:
3539: andi.l &0x00ff00ff,USER_FPSR(%a6)
3540:
3541: clr.l %d0
3542: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3543:
3544: tst.b %d1
3545: bne.b _L16_2x
3546: bsr.l scosh # operand is a NORM
3547: bra.b _L16_6x
3548: _L16_2x:
3549: cmpi.b %d1,&ZERO # is operand a ZERO?
3550: bne.b _L16_3x # no
3551: bsr.l ld_pone # yes
3552: bra.b _L16_6x
3553: _L16_3x:
3554: cmpi.b %d1,&INF # is operand an INF?
3555: bne.b _L16_4x # no
3556: bsr.l ld_pinf # yes
3557: bra.b _L16_6x
3558: _L16_4x:
3559: cmpi.b %d1,&QNAN # is operand a QNAN?
3560: bne.b _L16_5x # no
3561: bsr.l src_qnan # yes
3562: bra.b _L16_6x
3563: _L16_5x:
3564: bsr.l scoshd # operand is a DENORM
3565: _L16_6x:
3566:
3567: #
3568: # Result is now in FP0
3569: #
3570: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3571: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3572: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3573: unlk %a6
3574: rts
3575:
3576:
3577: #########################################################################
3578: # MONADIC TEMPLATE #
3579: #########################################################################
3580: global _facoss_
3581: _facoss_:
3582: link %a6,&-LOCAL_SIZE
3583:
3584: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3585: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3586: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3587:
3588: fmov.l &0x0,%fpcr # zero FPCR
3589:
3590: #
3591: # copy, convert, and tag input argument
3592: #
3593: fmov.s 0x8(%a6),%fp0 # load sgl input
3594: fmov.x %fp0,FP_SRC(%a6)
3595: lea FP_SRC(%a6),%a0
3596: bsr.l tag # fetch operand type
3597: mov.b %d0,STAG(%a6)
3598: mov.b %d0,%d1
3599:
3600: andi.l &0x00ff00ff,USER_FPSR(%a6)
3601:
3602: clr.l %d0
3603: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3604:
3605: tst.b %d1
3606: bne.b _L17_2s
3607: bsr.l sacos # operand is a NORM
3608: bra.b _L17_6s
3609: _L17_2s:
3610: cmpi.b %d1,&ZERO # is operand a ZERO?
3611: bne.b _L17_3s # no
3612: bsr.l ld_ppi2 # yes
3613: bra.b _L17_6s
3614: _L17_3s:
3615: cmpi.b %d1,&INF # is operand an INF?
3616: bne.b _L17_4s # no
3617: bsr.l t_operr # yes
3618: bra.b _L17_6s
3619: _L17_4s:
3620: cmpi.b %d1,&QNAN # is operand a QNAN?
3621: bne.b _L17_5s # no
3622: bsr.l src_qnan # yes
3623: bra.b _L17_6s
3624: _L17_5s:
3625: bsr.l sacosd # operand is a DENORM
3626: _L17_6s:
3627:
3628: #
3629: # Result is now in FP0
3630: #
3631: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3632: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3633: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3634: unlk %a6
3635: rts
3636:
3637: global _facosd_
3638: _facosd_:
3639: link %a6,&-LOCAL_SIZE
3640:
3641: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3642: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3643: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3644:
3645: fmov.l &0x0,%fpcr # zero FPCR
3646:
3647: #
3648: # copy, convert, and tag input argument
3649: #
3650: fmov.d 0x8(%a6),%fp0 # load dbl input
3651: fmov.x %fp0,FP_SRC(%a6)
3652: lea FP_SRC(%a6),%a0
3653: bsr.l tag # fetch operand type
3654: mov.b %d0,STAG(%a6)
3655: mov.b %d0,%d1
3656:
3657: andi.l &0x00ff00ff,USER_FPSR(%a6)
3658:
3659: clr.l %d0
3660: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3661:
3662: mov.b %d1,STAG(%a6)
3663: tst.b %d1
3664: bne.b _L17_2d
3665: bsr.l sacos # operand is a NORM
3666: bra.b _L17_6d
3667: _L17_2d:
3668: cmpi.b %d1,&ZERO # is operand a ZERO?
3669: bne.b _L17_3d # no
3670: bsr.l ld_ppi2 # yes
3671: bra.b _L17_6d
3672: _L17_3d:
3673: cmpi.b %d1,&INF # is operand an INF?
3674: bne.b _L17_4d # no
3675: bsr.l t_operr # yes
3676: bra.b _L17_6d
3677: _L17_4d:
3678: cmpi.b %d1,&QNAN # is operand a QNAN?
3679: bne.b _L17_5d # no
3680: bsr.l src_qnan # yes
3681: bra.b _L17_6d
3682: _L17_5d:
3683: bsr.l sacosd # operand is a DENORM
3684: _L17_6d:
3685:
3686: #
3687: # Result is now in FP0
3688: #
3689: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3690: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3691: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3692: unlk %a6
3693: rts
3694:
3695: global _facosx_
3696: _facosx_:
3697: link %a6,&-LOCAL_SIZE
3698:
3699: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3700: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3701: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3702:
3703: fmov.l &0x0,%fpcr # zero FPCR
3704:
3705: #
3706: # copy, convert, and tag input argument
3707: #
3708: lea FP_SRC(%a6),%a0
3709: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3710: mov.l 0x8+0x4(%a6),0x4(%a0)
3711: mov.l 0x8+0x8(%a6),0x8(%a0)
3712: bsr.l tag # fetch operand type
3713: mov.b %d0,STAG(%a6)
3714: mov.b %d0,%d1
3715:
3716: andi.l &0x00ff00ff,USER_FPSR(%a6)
3717:
3718: clr.l %d0
3719: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3720:
3721: tst.b %d1
3722: bne.b _L17_2x
3723: bsr.l sacos # operand is a NORM
3724: bra.b _L17_6x
3725: _L17_2x:
3726: cmpi.b %d1,&ZERO # is operand a ZERO?
3727: bne.b _L17_3x # no
3728: bsr.l ld_ppi2 # yes
3729: bra.b _L17_6x
3730: _L17_3x:
3731: cmpi.b %d1,&INF # is operand an INF?
3732: bne.b _L17_4x # no
3733: bsr.l t_operr # yes
3734: bra.b _L17_6x
3735: _L17_4x:
3736: cmpi.b %d1,&QNAN # is operand a QNAN?
3737: bne.b _L17_5x # no
3738: bsr.l src_qnan # yes
3739: bra.b _L17_6x
3740: _L17_5x:
3741: bsr.l sacosd # operand is a DENORM
3742: _L17_6x:
3743:
3744: #
3745: # Result is now in FP0
3746: #
3747: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3748: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3749: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3750: unlk %a6
3751: rts
3752:
3753:
3754: #########################################################################
3755: # MONADIC TEMPLATE #
3756: #########################################################################
3757: global _fgetexps_
3758: _fgetexps_:
3759: link %a6,&-LOCAL_SIZE
3760:
3761: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3762: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3763: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3764:
3765: fmov.l &0x0,%fpcr # zero FPCR
3766:
3767: #
3768: # copy, convert, and tag input argument
3769: #
3770: fmov.s 0x8(%a6),%fp0 # load sgl input
3771: fmov.x %fp0,FP_SRC(%a6)
3772: lea FP_SRC(%a6),%a0
3773: bsr.l tag # fetch operand type
3774: mov.b %d0,STAG(%a6)
3775: mov.b %d0,%d1
3776:
3777: andi.l &0x00ff00ff,USER_FPSR(%a6)
3778:
3779: clr.l %d0
3780: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3781:
3782: tst.b %d1
3783: bne.b _L18_2s
3784: bsr.l sgetexp # operand is a NORM
3785: bra.b _L18_6s
3786: _L18_2s:
3787: cmpi.b %d1,&ZERO # is operand a ZERO?
3788: bne.b _L18_3s # no
3789: bsr.l src_zero # yes
3790: bra.b _L18_6s
3791: _L18_3s:
3792: cmpi.b %d1,&INF # is operand an INF?
3793: bne.b _L18_4s # no
3794: bsr.l t_operr # yes
3795: bra.b _L18_6s
3796: _L18_4s:
3797: cmpi.b %d1,&QNAN # is operand a QNAN?
3798: bne.b _L18_5s # no
3799: bsr.l src_qnan # yes
3800: bra.b _L18_6s
3801: _L18_5s:
3802: bsr.l sgetexpd # operand is a DENORM
3803: _L18_6s:
3804:
3805: #
3806: # Result is now in FP0
3807: #
3808: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3809: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3810: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3811: unlk %a6
3812: rts
3813:
3814: global _fgetexpd_
3815: _fgetexpd_:
3816: link %a6,&-LOCAL_SIZE
3817:
3818: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3819: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3820: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3821:
3822: fmov.l &0x0,%fpcr # zero FPCR
3823:
3824: #
3825: # copy, convert, and tag input argument
3826: #
3827: fmov.d 0x8(%a6),%fp0 # load dbl input
3828: fmov.x %fp0,FP_SRC(%a6)
3829: lea FP_SRC(%a6),%a0
3830: bsr.l tag # fetch operand type
3831: mov.b %d0,STAG(%a6)
3832: mov.b %d0,%d1
3833:
3834: andi.l &0x00ff00ff,USER_FPSR(%a6)
3835:
3836: clr.l %d0
3837: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3838:
3839: mov.b %d1,STAG(%a6)
3840: tst.b %d1
3841: bne.b _L18_2d
3842: bsr.l sgetexp # operand is a NORM
3843: bra.b _L18_6d
3844: _L18_2d:
3845: cmpi.b %d1,&ZERO # is operand a ZERO?
3846: bne.b _L18_3d # no
3847: bsr.l src_zero # yes
3848: bra.b _L18_6d
3849: _L18_3d:
3850: cmpi.b %d1,&INF # is operand an INF?
3851: bne.b _L18_4d # no
3852: bsr.l t_operr # yes
3853: bra.b _L18_6d
3854: _L18_4d:
3855: cmpi.b %d1,&QNAN # is operand a QNAN?
3856: bne.b _L18_5d # no
3857: bsr.l src_qnan # yes
3858: bra.b _L18_6d
3859: _L18_5d:
3860: bsr.l sgetexpd # operand is a DENORM
3861: _L18_6d:
3862:
3863: #
3864: # Result is now in FP0
3865: #
3866: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3867: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3868: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3869: unlk %a6
3870: rts
3871:
3872: global _fgetexpx_
3873: _fgetexpx_:
3874: link %a6,&-LOCAL_SIZE
3875:
3876: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3877: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3878: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3879:
3880: fmov.l &0x0,%fpcr # zero FPCR
3881:
3882: #
3883: # copy, convert, and tag input argument
3884: #
3885: lea FP_SRC(%a6),%a0
3886: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3887: mov.l 0x8+0x4(%a6),0x4(%a0)
3888: mov.l 0x8+0x8(%a6),0x8(%a0)
3889: bsr.l tag # fetch operand type
3890: mov.b %d0,STAG(%a6)
3891: mov.b %d0,%d1
3892:
3893: andi.l &0x00ff00ff,USER_FPSR(%a6)
3894:
3895: clr.l %d0
3896: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3897:
3898: tst.b %d1
3899: bne.b _L18_2x
3900: bsr.l sgetexp # operand is a NORM
3901: bra.b _L18_6x
3902: _L18_2x:
3903: cmpi.b %d1,&ZERO # is operand a ZERO?
3904: bne.b _L18_3x # no
3905: bsr.l src_zero # yes
3906: bra.b _L18_6x
3907: _L18_3x:
3908: cmpi.b %d1,&INF # is operand an INF?
3909: bne.b _L18_4x # no
3910: bsr.l t_operr # yes
3911: bra.b _L18_6x
3912: _L18_4x:
3913: cmpi.b %d1,&QNAN # is operand a QNAN?
3914: bne.b _L18_5x # no
3915: bsr.l src_qnan # yes
3916: bra.b _L18_6x
3917: _L18_5x:
3918: bsr.l sgetexpd # operand is a DENORM
3919: _L18_6x:
3920:
3921: #
3922: # Result is now in FP0
3923: #
3924: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3925: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3926: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3927: unlk %a6
3928: rts
3929:
3930:
3931: #########################################################################
3932: # MONADIC TEMPLATE #
3933: #########################################################################
3934: global _fgetmans_
3935: _fgetmans_:
3936: link %a6,&-LOCAL_SIZE
3937:
3938: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3939: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3940: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3941:
3942: fmov.l &0x0,%fpcr # zero FPCR
3943:
3944: #
3945: # copy, convert, and tag input argument
3946: #
3947: fmov.s 0x8(%a6),%fp0 # load sgl input
3948: fmov.x %fp0,FP_SRC(%a6)
3949: lea FP_SRC(%a6),%a0
3950: bsr.l tag # fetch operand type
3951: mov.b %d0,STAG(%a6)
3952: mov.b %d0,%d1
3953:
3954: andi.l &0x00ff00ff,USER_FPSR(%a6)
3955:
3956: clr.l %d0
3957: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3958:
3959: tst.b %d1
3960: bne.b _L19_2s
3961: bsr.l sgetman # operand is a NORM
3962: bra.b _L19_6s
3963: _L19_2s:
3964: cmpi.b %d1,&ZERO # is operand a ZERO?
3965: bne.b _L19_3s # no
3966: bsr.l src_zero # yes
3967: bra.b _L19_6s
3968: _L19_3s:
3969: cmpi.b %d1,&INF # is operand an INF?
3970: bne.b _L19_4s # no
3971: bsr.l t_operr # yes
3972: bra.b _L19_6s
3973: _L19_4s:
3974: cmpi.b %d1,&QNAN # is operand a QNAN?
3975: bne.b _L19_5s # no
3976: bsr.l src_qnan # yes
3977: bra.b _L19_6s
3978: _L19_5s:
3979: bsr.l sgetmand # operand is a DENORM
3980: _L19_6s:
3981:
3982: #
3983: # Result is now in FP0
3984: #
3985: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3986: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3987: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3988: unlk %a6
3989: rts
3990:
3991: global _fgetmand_
3992: _fgetmand_:
3993: link %a6,&-LOCAL_SIZE
3994:
3995: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3996: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3997: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3998:
3999: fmov.l &0x0,%fpcr # zero FPCR
4000:
4001: #
4002: # copy, convert, and tag input argument
4003: #
4004: fmov.d 0x8(%a6),%fp0 # load dbl input
4005: fmov.x %fp0,FP_SRC(%a6)
4006: lea FP_SRC(%a6),%a0
4007: bsr.l tag # fetch operand type
4008: mov.b %d0,STAG(%a6)
4009: mov.b %d0,%d1
4010:
4011: andi.l &0x00ff00ff,USER_FPSR(%a6)
4012:
4013: clr.l %d0
4014: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4015:
4016: mov.b %d1,STAG(%a6)
4017: tst.b %d1
4018: bne.b _L19_2d
4019: bsr.l sgetman # operand is a NORM
4020: bra.b _L19_6d
4021: _L19_2d:
4022: cmpi.b %d1,&ZERO # is operand a ZERO?
4023: bne.b _L19_3d # no
4024: bsr.l src_zero # yes
4025: bra.b _L19_6d
4026: _L19_3d:
4027: cmpi.b %d1,&INF # is operand an INF?
4028: bne.b _L19_4d # no
4029: bsr.l t_operr # yes
4030: bra.b _L19_6d
4031: _L19_4d:
4032: cmpi.b %d1,&QNAN # is operand a QNAN?
4033: bne.b _L19_5d # no
4034: bsr.l src_qnan # yes
4035: bra.b _L19_6d
4036: _L19_5d:
4037: bsr.l sgetmand # operand is a DENORM
4038: _L19_6d:
4039:
4040: #
4041: # Result is now in FP0
4042: #
4043: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4044: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4045: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4046: unlk %a6
4047: rts
4048:
4049: global _fgetmanx_
4050: _fgetmanx_:
4051: link %a6,&-LOCAL_SIZE
4052:
4053: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4054: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4055: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4056:
4057: fmov.l &0x0,%fpcr # zero FPCR
4058:
4059: #
4060: # copy, convert, and tag input argument
4061: #
4062: lea FP_SRC(%a6),%a0
4063: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4064: mov.l 0x8+0x4(%a6),0x4(%a0)
4065: mov.l 0x8+0x8(%a6),0x8(%a0)
4066: bsr.l tag # fetch operand type
4067: mov.b %d0,STAG(%a6)
4068: mov.b %d0,%d1
4069:
4070: andi.l &0x00ff00ff,USER_FPSR(%a6)
4071:
4072: clr.l %d0
4073: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4074:
4075: tst.b %d1
4076: bne.b _L19_2x
4077: bsr.l sgetman # operand is a NORM
4078: bra.b _L19_6x
4079: _L19_2x:
4080: cmpi.b %d1,&ZERO # is operand a ZERO?
4081: bne.b _L19_3x # no
4082: bsr.l src_zero # yes
4083: bra.b _L19_6x
4084: _L19_3x:
4085: cmpi.b %d1,&INF # is operand an INF?
4086: bne.b _L19_4x # no
4087: bsr.l t_operr # yes
4088: bra.b _L19_6x
4089: _L19_4x:
4090: cmpi.b %d1,&QNAN # is operand a QNAN?
4091: bne.b _L19_5x # no
4092: bsr.l src_qnan # yes
4093: bra.b _L19_6x
4094: _L19_5x:
4095: bsr.l sgetmand # operand is a DENORM
4096: _L19_6x:
4097:
4098: #
4099: # Result is now in FP0
4100: #
4101: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4102: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4103: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4104: unlk %a6
4105: rts
4106:
4107:
4108: #########################################################################
4109: # MONADIC TEMPLATE #
4110: #########################################################################
4111: global _fsincoss_
4112: _fsincoss_:
4113: link %a6,&-LOCAL_SIZE
4114:
4115: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4116: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4117: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4118:
4119: fmov.l &0x0,%fpcr # zero FPCR
4120:
4121: #
4122: # copy, convert, and tag input argument
4123: #
4124: fmov.s 0x8(%a6),%fp0 # load sgl input
4125: fmov.x %fp0,FP_SRC(%a6)
4126: lea FP_SRC(%a6),%a0
4127: bsr.l tag # fetch operand type
4128: mov.b %d0,STAG(%a6)
4129: mov.b %d0,%d1
4130:
4131: andi.l &0x00ff00ff,USER_FPSR(%a6)
4132:
4133: clr.l %d0
4134: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4135:
4136: tst.b %d1
4137: bne.b _L20_2s
4138: bsr.l ssincos # operand is a NORM
4139: bra.b _L20_6s
4140: _L20_2s:
4141: cmpi.b %d1,&ZERO # is operand a ZERO?
4142: bne.b _L20_3s # no
4143: bsr.l ssincosz # yes
4144: bra.b _L20_6s
4145: _L20_3s:
4146: cmpi.b %d1,&INF # is operand an INF?
4147: bne.b _L20_4s # no
4148: bsr.l ssincosi # yes
4149: bra.b _L20_6s
4150: _L20_4s:
4151: cmpi.b %d1,&QNAN # is operand a QNAN?
4152: bne.b _L20_5s # no
4153: bsr.l ssincosqnan # yes
4154: bra.b _L20_6s
4155: _L20_5s:
4156: bsr.l ssincosd # operand is a DENORM
4157: _L20_6s:
4158:
4159: #
4160: # Result is now in FP0
4161: #
4162: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4163: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4164: fmovm.x &0x03,-(%sp) # store off fp0/fp1
4165: fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4166: fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4167: unlk %a6
4168: rts
4169:
4170: global _fsincosd_
4171: _fsincosd_:
4172: link %a6,&-LOCAL_SIZE
4173:
4174: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4175: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4176: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4177:
4178: fmov.l &0x0,%fpcr # zero FPCR
4179:
4180: #
4181: # copy, convert, and tag input argument
4182: #
4183: fmov.d 0x8(%a6),%fp0 # load dbl input
4184: fmov.x %fp0,FP_SRC(%a6)
4185: lea FP_SRC(%a6),%a0
4186: bsr.l tag # fetch operand type
4187: mov.b %d0,STAG(%a6)
4188: mov.b %d0,%d1
4189:
4190: andi.l &0x00ff00ff,USER_FPSR(%a6)
4191:
4192: clr.l %d0
4193: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4194:
4195: mov.b %d1,STAG(%a6)
4196: tst.b %d1
4197: bne.b _L20_2d
4198: bsr.l ssincos # operand is a NORM
4199: bra.b _L20_6d
4200: _L20_2d:
4201: cmpi.b %d1,&ZERO # is operand a ZERO?
4202: bne.b _L20_3d # no
4203: bsr.l ssincosz # yes
4204: bra.b _L20_6d
4205: _L20_3d:
4206: cmpi.b %d1,&INF # is operand an INF?
4207: bne.b _L20_4d # no
4208: bsr.l ssincosi # yes
4209: bra.b _L20_6d
4210: _L20_4d:
4211: cmpi.b %d1,&QNAN # is operand a QNAN?
4212: bne.b _L20_5d # no
4213: bsr.l ssincosqnan # yes
4214: bra.b _L20_6d
4215: _L20_5d:
4216: bsr.l ssincosd # operand is a DENORM
4217: _L20_6d:
4218:
4219: #
4220: # Result is now in FP0
4221: #
4222: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4223: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4224: fmovm.x &0x03,-(%sp) # store off fp0/fp1
4225: fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4226: fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4227: unlk %a6
4228: rts
4229:
4230: global _fsincosx_
4231: _fsincosx_:
4232: link %a6,&-LOCAL_SIZE
4233:
4234: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4235: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4236: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4237:
4238: fmov.l &0x0,%fpcr # zero FPCR
4239:
4240: #
4241: # copy, convert, and tag input argument
4242: #
4243: lea FP_SRC(%a6),%a0
4244: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4245: mov.l 0x8+0x4(%a6),0x4(%a0)
4246: mov.l 0x8+0x8(%a6),0x8(%a0)
4247: bsr.l tag # fetch operand type
4248: mov.b %d0,STAG(%a6)
4249: mov.b %d0,%d1
4250:
4251: andi.l &0x00ff00ff,USER_FPSR(%a6)
4252:
4253: clr.l %d0
4254: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4255:
4256: tst.b %d1
4257: bne.b _L20_2x
4258: bsr.l ssincos # operand is a NORM
4259: bra.b _L20_6x
4260: _L20_2x:
4261: cmpi.b %d1,&ZERO # is operand a ZERO?
4262: bne.b _L20_3x # no
4263: bsr.l ssincosz # yes
4264: bra.b _L20_6x
4265: _L20_3x:
4266: cmpi.b %d1,&INF # is operand an INF?
4267: bne.b _L20_4x # no
4268: bsr.l ssincosi # yes
4269: bra.b _L20_6x
4270: _L20_4x:
4271: cmpi.b %d1,&QNAN # is operand a QNAN?
4272: bne.b _L20_5x # no
4273: bsr.l ssincosqnan # yes
4274: bra.b _L20_6x
4275: _L20_5x:
4276: bsr.l ssincosd # operand is a DENORM
4277: _L20_6x:
4278:
4279: #
4280: # Result is now in FP0
4281: #
4282: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4283: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4284: fmovm.x &0x03,-(%sp) # store off fp0/fp1
4285: fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4286: fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4287: unlk %a6
4288: rts
4289:
4290:
4291: #########################################################################
4292: # DYADIC TEMPLATE #
4293: #########################################################################
4294: global _frems_
4295: _frems_:
4296: link %a6,&-LOCAL_SIZE
4297:
4298: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4299: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4300: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4301:
4302: fmov.l &0x0,%fpcr # zero FPCR
4303:
4304: #
4305: # copy, convert, and tag input argument
4306: #
4307: fmov.s 0x8(%a6),%fp0 # load sgl dst
4308: fmov.x %fp0,FP_DST(%a6)
4309: lea FP_DST(%a6),%a0
4310: bsr.l tag # fetch operand type
4311: mov.b %d0,DTAG(%a6)
4312:
4313: fmov.s 0xc(%a6),%fp0 # load sgl src
4314: fmov.x %fp0,FP_SRC(%a6)
4315: lea FP_SRC(%a6),%a0
4316: bsr.l tag # fetch operand type
4317: mov.b %d0,STAG(%a6)
4318: mov.l %d0,%d1
4319:
4320: andi.l &0x00ff00ff,USER_FPSR(%a6)
4321:
4322: clr.l %d0
4323: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4324:
4325: lea FP_SRC(%a6),%a0 # pass ptr to src
4326: lea FP_DST(%a6),%a1 # pass ptr to dst
4327:
4328: tst.b %d1
4329: bne.b _L21_2s
4330: bsr.l srem_snorm # operand is a NORM
4331: bra.b _L21_6s
4332: _L21_2s:
4333: cmpi.b %d1,&ZERO # is operand a ZERO?
4334: bne.b _L21_3s # no
4335: bsr.l srem_szero # yes
4336: bra.b _L21_6s
4337: _L21_3s:
4338: cmpi.b %d1,&INF # is operand an INF?
4339: bne.b _L21_4s # no
4340: bsr.l srem_sinf # yes
4341: bra.b _L21_6s
4342: _L21_4s:
4343: cmpi.b %d1,&QNAN # is operand a QNAN?
4344: bne.b _L21_5s # no
4345: bsr.l sop_sqnan # yes
4346: bra.b _L21_6s
4347: _L21_5s:
4348: bsr.l srem_sdnrm # operand is a DENORM
4349: _L21_6s:
4350:
4351: #
4352: # Result is now in FP0
4353: #
4354: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4355: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4356: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4357: unlk %a6
4358: rts
4359:
4360: global _fremd_
4361: _fremd_:
4362: link %a6,&-LOCAL_SIZE
4363:
4364: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4365: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4366: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4367:
4368: fmov.l &0x0,%fpcr # zero FPCR
4369:
4370: #
4371: # copy, convert, and tag input argument
4372: #
4373: fmov.d 0x8(%a6),%fp0 # load dbl dst
4374: fmov.x %fp0,FP_DST(%a6)
4375: lea FP_DST(%a6),%a0
4376: bsr.l tag # fetch operand type
4377: mov.b %d0,DTAG(%a6)
4378:
4379: fmov.d 0x10(%a6),%fp0 # load dbl src
4380: fmov.x %fp0,FP_SRC(%a6)
4381: lea FP_SRC(%a6),%a0
4382: bsr.l tag # fetch operand type
4383: mov.b %d0,STAG(%a6)
4384: mov.l %d0,%d1
4385:
4386: andi.l &0x00ff00ff,USER_FPSR(%a6)
4387:
4388: clr.l %d0
4389: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4390:
4391: lea FP_SRC(%a6),%a0 # pass ptr to src
4392: lea FP_DST(%a6),%a1 # pass ptr to dst
4393:
4394: tst.b %d1
4395: bne.b _L21_2d
4396: bsr.l srem_snorm # operand is a NORM
4397: bra.b _L21_6d
4398: _L21_2d:
4399: cmpi.b %d1,&ZERO # is operand a ZERO?
4400: bne.b _L21_3d # no
4401: bsr.l srem_szero # yes
4402: bra.b _L21_6d
4403: _L21_3d:
4404: cmpi.b %d1,&INF # is operand an INF?
4405: bne.b _L21_4d # no
4406: bsr.l srem_sinf # yes
4407: bra.b _L21_6d
4408: _L21_4d:
4409: cmpi.b %d1,&QNAN # is operand a QNAN?
4410: bne.b _L21_5d # no
4411: bsr.l sop_sqnan # yes
4412: bra.b _L21_6d
4413: _L21_5d:
4414: bsr.l srem_sdnrm # operand is a DENORM
4415: _L21_6d:
4416:
4417: #
4418: # Result is now in FP0
4419: #
4420: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4421: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4422: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4423: unlk %a6
4424: rts
4425:
4426: global _fremx_
4427: _fremx_:
4428: link %a6,&-LOCAL_SIZE
4429:
4430: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4431: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4432: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4433:
4434: fmov.l &0x0,%fpcr # zero FPCR
4435:
4436: #
4437: # copy, convert, and tag input argument
4438: #
4439: lea FP_DST(%a6),%a0
4440: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4441: mov.l 0x8+0x4(%a6),0x4(%a0)
4442: mov.l 0x8+0x8(%a6),0x8(%a0)
4443: bsr.l tag # fetch operand type
4444: mov.b %d0,DTAG(%a6)
4445:
4446: lea FP_SRC(%a6),%a0
4447: mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4448: mov.l 0x14+0x4(%a6),0x4(%a0)
4449: mov.l 0x14+0x8(%a6),0x8(%a0)
4450: bsr.l tag # fetch operand type
4451: mov.b %d0,STAG(%a6)
4452: mov.l %d0,%d1
4453:
4454: andi.l &0x00ff00ff,USER_FPSR(%a6)
4455:
4456: clr.l %d0
4457: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4458:
4459: lea FP_SRC(%a6),%a0 # pass ptr to src
4460: lea FP_DST(%a6),%a1 # pass ptr to dst
4461:
4462: tst.b %d1
4463: bne.b _L21_2x
4464: bsr.l srem_snorm # operand is a NORM
4465: bra.b _L21_6x
4466: _L21_2x:
4467: cmpi.b %d1,&ZERO # is operand a ZERO?
4468: bne.b _L21_3x # no
4469: bsr.l srem_szero # yes
4470: bra.b _L21_6x
4471: _L21_3x:
4472: cmpi.b %d1,&INF # is operand an INF?
4473: bne.b _L21_4x # no
4474: bsr.l srem_sinf # yes
4475: bra.b _L21_6x
4476: _L21_4x:
4477: cmpi.b %d1,&QNAN # is operand a QNAN?
4478: bne.b _L21_5x # no
4479: bsr.l sop_sqnan # yes
4480: bra.b _L21_6x
4481: _L21_5x:
4482: bsr.l srem_sdnrm # operand is a DENORM
4483: _L21_6x:
4484:
4485: #
4486: # Result is now in FP0
4487: #
4488: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4489: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4490: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4491: unlk %a6
4492: rts
4493:
4494:
4495: #########################################################################
4496: # DYADIC TEMPLATE #
4497: #########################################################################
4498: global _fmods_
4499: _fmods_:
4500: link %a6,&-LOCAL_SIZE
4501:
4502: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4503: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4504: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4505:
4506: fmov.l &0x0,%fpcr # zero FPCR
4507:
4508: #
4509: # copy, convert, and tag input argument
4510: #
4511: fmov.s 0x8(%a6),%fp0 # load sgl dst
4512: fmov.x %fp0,FP_DST(%a6)
4513: lea FP_DST(%a6),%a0
4514: bsr.l tag # fetch operand type
4515: mov.b %d0,DTAG(%a6)
4516:
4517: fmov.s 0xc(%a6),%fp0 # load sgl src
4518: fmov.x %fp0,FP_SRC(%a6)
4519: lea FP_SRC(%a6),%a0
4520: bsr.l tag # fetch operand type
4521: mov.b %d0,STAG(%a6)
4522: mov.l %d0,%d1
4523:
4524: andi.l &0x00ff00ff,USER_FPSR(%a6)
4525:
4526: clr.l %d0
4527: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4528:
4529: lea FP_SRC(%a6),%a0 # pass ptr to src
4530: lea FP_DST(%a6),%a1 # pass ptr to dst
4531:
4532: tst.b %d1
4533: bne.b _L22_2s
4534: bsr.l smod_snorm # operand is a NORM
4535: bra.b _L22_6s
4536: _L22_2s:
4537: cmpi.b %d1,&ZERO # is operand a ZERO?
4538: bne.b _L22_3s # no
4539: bsr.l smod_szero # yes
4540: bra.b _L22_6s
4541: _L22_3s:
4542: cmpi.b %d1,&INF # is operand an INF?
4543: bne.b _L22_4s # no
4544: bsr.l smod_sinf # yes
4545: bra.b _L22_6s
4546: _L22_4s:
4547: cmpi.b %d1,&QNAN # is operand a QNAN?
4548: bne.b _L22_5s # no
4549: bsr.l sop_sqnan # yes
4550: bra.b _L22_6s
4551: _L22_5s:
4552: bsr.l smod_sdnrm # operand is a DENORM
4553: _L22_6s:
4554:
4555: #
4556: # Result is now in FP0
4557: #
4558: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4559: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4560: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4561: unlk %a6
4562: rts
4563:
4564: global _fmodd_
4565: _fmodd_:
4566: link %a6,&-LOCAL_SIZE
4567:
4568: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4569: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4570: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4571:
4572: fmov.l &0x0,%fpcr # zero FPCR
4573:
4574: #
4575: # copy, convert, and tag input argument
4576: #
4577: fmov.d 0x8(%a6),%fp0 # load dbl dst
4578: fmov.x %fp0,FP_DST(%a6)
4579: lea FP_DST(%a6),%a0
4580: bsr.l tag # fetch operand type
4581: mov.b %d0,DTAG(%a6)
4582:
4583: fmov.d 0x10(%a6),%fp0 # load dbl src
4584: fmov.x %fp0,FP_SRC(%a6)
4585: lea FP_SRC(%a6),%a0
4586: bsr.l tag # fetch operand type
4587: mov.b %d0,STAG(%a6)
4588: mov.l %d0,%d1
4589:
4590: andi.l &0x00ff00ff,USER_FPSR(%a6)
4591:
4592: clr.l %d0
4593: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4594:
4595: lea FP_SRC(%a6),%a0 # pass ptr to src
4596: lea FP_DST(%a6),%a1 # pass ptr to dst
4597:
4598: tst.b %d1
4599: bne.b _L22_2d
4600: bsr.l smod_snorm # operand is a NORM
4601: bra.b _L22_6d
4602: _L22_2d:
4603: cmpi.b %d1,&ZERO # is operand a ZERO?
4604: bne.b _L22_3d # no
4605: bsr.l smod_szero # yes
4606: bra.b _L22_6d
4607: _L22_3d:
4608: cmpi.b %d1,&INF # is operand an INF?
4609: bne.b _L22_4d # no
4610: bsr.l smod_sinf # yes
4611: bra.b _L22_6d
4612: _L22_4d:
4613: cmpi.b %d1,&QNAN # is operand a QNAN?
4614: bne.b _L22_5d # no
4615: bsr.l sop_sqnan # yes
4616: bra.b _L22_6d
4617: _L22_5d:
4618: bsr.l smod_sdnrm # operand is a DENORM
4619: _L22_6d:
4620:
4621: #
4622: # Result is now in FP0
4623: #
4624: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4625: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4626: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4627: unlk %a6
4628: rts
4629:
4630: global _fmodx_
4631: _fmodx_:
4632: link %a6,&-LOCAL_SIZE
4633:
4634: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4635: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4636: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4637:
4638: fmov.l &0x0,%fpcr # zero FPCR
4639:
4640: #
4641: # copy, convert, and tag input argument
4642: #
4643: lea FP_DST(%a6),%a0
4644: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4645: mov.l 0x8+0x4(%a6),0x4(%a0)
4646: mov.l 0x8+0x8(%a6),0x8(%a0)
4647: bsr.l tag # fetch operand type
4648: mov.b %d0,DTAG(%a6)
4649:
4650: lea FP_SRC(%a6),%a0
4651: mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4652: mov.l 0x14+0x4(%a6),0x4(%a0)
4653: mov.l 0x14+0x8(%a6),0x8(%a0)
4654: bsr.l tag # fetch operand type
4655: mov.b %d0,STAG(%a6)
4656: mov.l %d0,%d1
4657:
4658: andi.l &0x00ff00ff,USER_FPSR(%a6)
4659:
4660: clr.l %d0
4661: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4662:
4663: lea FP_SRC(%a6),%a0 # pass ptr to src
4664: lea FP_DST(%a6),%a1 # pass ptr to dst
4665:
4666: tst.b %d1
4667: bne.b _L22_2x
4668: bsr.l smod_snorm # operand is a NORM
4669: bra.b _L22_6x
4670: _L22_2x:
4671: cmpi.b %d1,&ZERO # is operand a ZERO?
4672: bne.b _L22_3x # no
4673: bsr.l smod_szero # yes
4674: bra.b _L22_6x
4675: _L22_3x:
4676: cmpi.b %d1,&INF # is operand an INF?
4677: bne.b _L22_4x # no
4678: bsr.l smod_sinf # yes
4679: bra.b _L22_6x
4680: _L22_4x:
4681: cmpi.b %d1,&QNAN # is operand a QNAN?
4682: bne.b _L22_5x # no
4683: bsr.l sop_sqnan # yes
4684: bra.b _L22_6x
4685: _L22_5x:
4686: bsr.l smod_sdnrm # operand is a DENORM
4687: _L22_6x:
4688:
4689: #
4690: # Result is now in FP0
4691: #
4692: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4693: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4694: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4695: unlk %a6
4696: rts
4697:
4698:
4699: #########################################################################
4700: # DYADIC TEMPLATE #
4701: #########################################################################
4702: global _fscales_
4703: _fscales_:
4704: link %a6,&-LOCAL_SIZE
4705:
4706: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4707: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4708: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4709:
4710: fmov.l &0x0,%fpcr # zero FPCR
4711:
4712: #
4713: # copy, convert, and tag input argument
4714: #
4715: fmov.s 0x8(%a6),%fp0 # load sgl dst
4716: fmov.x %fp0,FP_DST(%a6)
4717: lea FP_DST(%a6),%a0
4718: bsr.l tag # fetch operand type
4719: mov.b %d0,DTAG(%a6)
4720:
4721: fmov.s 0xc(%a6),%fp0 # load sgl src
4722: fmov.x %fp0,FP_SRC(%a6)
4723: lea FP_SRC(%a6),%a0
4724: bsr.l tag # fetch operand type
4725: mov.b %d0,STAG(%a6)
4726: mov.l %d0,%d1
4727:
4728: andi.l &0x00ff00ff,USER_FPSR(%a6)
4729:
4730: clr.l %d0
4731: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4732:
4733: lea FP_SRC(%a6),%a0 # pass ptr to src
4734: lea FP_DST(%a6),%a1 # pass ptr to dst
4735:
4736: tst.b %d1
4737: bne.b _L23_2s
4738: bsr.l sscale_snorm # operand is a NORM
4739: bra.b _L23_6s
4740: _L23_2s:
4741: cmpi.b %d1,&ZERO # is operand a ZERO?
4742: bne.b _L23_3s # no
4743: bsr.l sscale_szero # yes
4744: bra.b _L23_6s
4745: _L23_3s:
4746: cmpi.b %d1,&INF # is operand an INF?
4747: bne.b _L23_4s # no
4748: bsr.l sscale_sinf # yes
4749: bra.b _L23_6s
4750: _L23_4s:
4751: cmpi.b %d1,&QNAN # is operand a QNAN?
4752: bne.b _L23_5s # no
4753: bsr.l sop_sqnan # yes
4754: bra.b _L23_6s
4755: _L23_5s:
4756: bsr.l sscale_sdnrm # operand is a DENORM
4757: _L23_6s:
4758:
4759: #
4760: # Result is now in FP0
4761: #
4762: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4763: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4764: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4765: unlk %a6
4766: rts
4767:
4768: global _fscaled_
4769: _fscaled_:
4770: link %a6,&-LOCAL_SIZE
4771:
4772: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4773: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4774: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4775:
4776: fmov.l &0x0,%fpcr # zero FPCR
4777:
4778: #
4779: # copy, convert, and tag input argument
4780: #
4781: fmov.d 0x8(%a6),%fp0 # load dbl dst
4782: fmov.x %fp0,FP_DST(%a6)
4783: lea FP_DST(%a6),%a0
4784: bsr.l tag # fetch operand type
4785: mov.b %d0,DTAG(%a6)
4786:
4787: fmov.d 0x10(%a6),%fp0 # load dbl src
4788: fmov.x %fp0,FP_SRC(%a6)
4789: lea FP_SRC(%a6),%a0
4790: bsr.l tag # fetch operand type
4791: mov.b %d0,STAG(%a6)
4792: mov.l %d0,%d1
4793:
4794: andi.l &0x00ff00ff,USER_FPSR(%a6)
4795:
4796: clr.l %d0
4797: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4798:
4799: lea FP_SRC(%a6),%a0 # pass ptr to src
4800: lea FP_DST(%a6),%a1 # pass ptr to dst
4801:
4802: tst.b %d1
4803: bne.b _L23_2d
4804: bsr.l sscale_snorm # operand is a NORM
4805: bra.b _L23_6d
4806: _L23_2d:
4807: cmpi.b %d1,&ZERO # is operand a ZERO?
4808: bne.b _L23_3d # no
4809: bsr.l sscale_szero # yes
4810: bra.b _L23_6d
4811: _L23_3d:
4812: cmpi.b %d1,&INF # is operand an INF?
4813: bne.b _L23_4d # no
4814: bsr.l sscale_sinf # yes
4815: bra.b _L23_6d
4816: _L23_4d:
4817: cmpi.b %d1,&QNAN # is operand a QNAN?
4818: bne.b _L23_5d # no
4819: bsr.l sop_sqnan # yes
4820: bra.b _L23_6d
4821: _L23_5d:
4822: bsr.l sscale_sdnrm # operand is a DENORM
4823: _L23_6d:
4824:
4825: #
4826: # Result is now in FP0
4827: #
4828: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4829: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4830: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4831: unlk %a6
4832: rts
4833:
4834: global _fscalex_
4835: _fscalex_:
4836: link %a6,&-LOCAL_SIZE
4837:
4838: movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4839: fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4840: fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4841:
4842: fmov.l &0x0,%fpcr # zero FPCR
4843:
4844: #
4845: # copy, convert, and tag input argument
4846: #
4847: lea FP_DST(%a6),%a0
4848: mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4849: mov.l 0x8+0x4(%a6),0x4(%a0)
4850: mov.l 0x8+0x8(%a6),0x8(%a0)
4851: bsr.l tag # fetch operand type
4852: mov.b %d0,DTAG(%a6)
4853:
4854: lea FP_SRC(%a6),%a0
4855: mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4856: mov.l 0x14+0x4(%a6),0x4(%a0)
4857: mov.l 0x14+0x8(%a6),0x8(%a0)
4858: bsr.l tag # fetch operand type
4859: mov.b %d0,STAG(%a6)
4860: mov.l %d0,%d1
4861:
4862: andi.l &0x00ff00ff,USER_FPSR(%a6)
4863:
4864: clr.l %d0
4865: mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4866:
4867: lea FP_SRC(%a6),%a0 # pass ptr to src
4868: lea FP_DST(%a6),%a1 # pass ptr to dst
4869:
4870: tst.b %d1
4871: bne.b _L23_2x
4872: bsr.l sscale_snorm # operand is a NORM
4873: bra.b _L23_6x
4874: _L23_2x:
4875: cmpi.b %d1,&ZERO # is operand a ZERO?
4876: bne.b _L23_3x # no
4877: bsr.l sscale_szero # yes
4878: bra.b _L23_6x
4879: _L23_3x:
4880: cmpi.b %d1,&INF # is operand an INF?
4881: bne.b _L23_4x # no
4882: bsr.l sscale_sinf # yes
4883: bra.b _L23_6x
4884: _L23_4x:
4885: cmpi.b %d1,&QNAN # is operand a QNAN?
4886: bne.b _L23_5x # no
4887: bsr.l sop_sqnan # yes
4888: bra.b _L23_6x
4889: _L23_5x:
4890: bsr.l sscale_sdnrm # operand is a DENORM
4891: _L23_6x:
4892:
4893: #
4894: # Result is now in FP0
4895: #
4896: movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4897: fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4898: fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4899: unlk %a6
4900: rts
4901:
4902:
4903: #########################################################################
4904: # ssin(): computes the sine of a normalized input #
4905: # ssind(): computes the sine of a denormalized input #
4906: # scos(): computes the cosine of a normalized input #
4907: # scosd(): computes the cosine of a denormalized input #
4908: # ssincos(): computes the sine and cosine of a normalized input #
4909: # ssincosd(): computes the sine and cosine of a denormalized input #
4910: # #
4911: # INPUT *************************************************************** #
4912: # a0 = pointer to extended precision input #
4913: # d0 = round precision,mode #
4914: # #
4915: # OUTPUT ************************************************************** #
4916: # fp0 = sin(X) or cos(X) #
4917: # #
4918: # For ssincos(X): #
4919: # fp0 = sin(X) #
4920: # fp1 = cos(X) #
4921: # #
4922: # ACCURACY and MONOTONICITY ******************************************* #
4923: # The returned result is within 1 ulp in 64 significant bit, i.e. #
4924: # within 0.5001 ulp to 53 bits if the result is subsequently #
4925: # rounded to double precision. The result is provably monotonic #
4926: # in double precision. #
4927: # #
4928: # ALGORITHM *********************************************************** #
4929: # #
4930: # SIN and COS: #
4931: # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
4932: # #
4933: # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
4934: # #
4935: # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4936: # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4937: # Overwrite k by k := k + AdjN. #
4938: # #
4939: # 4. If k is even, go to 6. #
4940: # #
4941: # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
4942: # Return sgn*cos(r) where cos(r) is approximated by an #
4943: # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
4944: # s = r*r. #
4945: # Exit. #
4946: # #
4947: # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
4948: # where sin(r) is approximated by an odd polynomial in r #
4949: # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
4950: # Exit. #
4951: # #
4952: # 7. If |X| > 1, go to 9. #
4953: # #
4954: # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
4955: # otherwise return 1. #
4956: # #
4957: # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4958: # go back to 3. #
4959: # #
4960: # SINCOS: #
4961: # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
4962: # #
4963: # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4964: # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4965: # #
4966: # 3. If k is even, go to 5. #
4967: # #
4968: # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
4969: # j1 exclusive or with the l.s.b. of k. #
4970: # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
4971: # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
4972: # sin(r) and cos(r) are computed as odd and even #
4973: # polynomials in r, respectively. Exit #
4974: # #
4975: # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
4976: # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
4977: # sin(r) and cos(r) are computed as odd and even #
4978: # polynomials in r, respectively. Exit #
4979: # #
4980: # 6. If |X| > 1, go to 8. #
4981: # #
4982: # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
4983: # #
4984: # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4985: # go back to 2. #
4986: # #
4987: #########################################################################
4988:
4989: SINA7: long 0xBD6AAA77,0xCCC994F5
4990: SINA6: long 0x3DE61209,0x7AAE8DA1
4991: SINA5: long 0xBE5AE645,0x2A118AE4
4992: SINA4: long 0x3EC71DE3,0xA5341531
4993: SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
4994: SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
4995: SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
4996:
4997: COSB8: long 0x3D2AC4D0,0xD6011EE3
4998: COSB7: long 0xBDA9396F,0x9F45AC19
4999: COSB6: long 0x3E21EED9,0x0612C972
5000: COSB5: long 0xBE927E4F,0xB79D9FCF
5001: COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5002: COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5003: COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5004: COSB1: long 0xBF000000
5005:
5006: set INARG,FP_SCR0
5007:
5008: set X,FP_SCR0
5009: # set XDCARE,X+2
5010: set XFRAC,X+4
5011:
5012: set RPRIME,FP_SCR0
5013: set SPRIME,FP_SCR1
5014:
5015: set POSNEG1,L_SCR1
5016: set TWOTO63,L_SCR1
5017:
5018: set ENDFLAG,L_SCR2
5019: set INT,L_SCR2
5020:
5021: set ADJN,L_SCR3
5022:
5023: ############################################
5024: global ssin
5025: ssin:
5026: mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5027: bra.b SINBGN
5028:
5029: ############################################
5030: global scos
5031: scos:
5032: mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5033:
5034: ############################################
5035: SINBGN:
5036: #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5037:
5038: fmov.x (%a0),%fp0 # LOAD INPUT
5039: fmov.x %fp0,X(%a6) # save input at X
5040:
5041: # "COMPACTIFY" X
5042: mov.l (%a0),%d1 # put exp in hi word
5043: mov.w 4(%a0),%d1 # fetch hi(man)
5044: and.l &0x7FFFFFFF,%d1 # strip sign
5045:
5046: cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5047: bge.b SOK1 # no
5048: bra.w SINSM # yes; input is very small
5049:
5050: SOK1:
5051: cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5052: blt.b SINMAIN # no
5053: bra.w SREDUCEX # yes; input is very large
5054:
5055: #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5056: #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5057: SINMAIN:
5058: fmov.x %fp0,%fp1
5059: fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5060:
5061: lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5062:
5063: fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5064:
5065: mov.l INT(%a6),%d1 # make a copy of N
5066: asl.l &4,%d1 # N *= 16
5067: add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5068:
5069: # A1 IS THE ADDRESS OF N*PIBY2
5070: # ...WHICH IS IN TWO PIECES Y1 & Y2
5071: fsub.x (%a1)+,%fp0 # X-Y1
5072: fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5073:
5074: SINCONT:
5075: #--continuation from REDUCEX
5076:
5077: #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5078: mov.l INT(%a6),%d1
5079: add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5080: ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5081: cmp.l %d1,&0
5082: blt.w COSPOLY
5083:
5084: #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5085: #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5086: #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5087: #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5088: #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5089: #--WHERE T=S*S.
5090: #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5091: #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5092: SINPOLY:
5093: fmovm.x &0x0c,-(%sp) # save fp2/fp3
5094:
5095: fmov.x %fp0,X(%a6) # X IS R
5096: fmul.x %fp0,%fp0 # FP0 IS S
5097:
5098: fmov.d SINA7(%pc),%fp3
5099: fmov.d SINA6(%pc),%fp2
5100:
5101: fmov.x %fp0,%fp1
5102: fmul.x %fp1,%fp1 # FP1 IS T
5103:
5104: ror.l &1,%d1
5105: and.l &0x80000000,%d1
5106: # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5107: eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5108:
5109: fmul.x %fp1,%fp3 # TA7
5110: fmul.x %fp1,%fp2 # TA6
5111:
5112: fadd.d SINA5(%pc),%fp3 # A5+TA7
5113: fadd.d SINA4(%pc),%fp2 # A4+TA6
5114:
5115: fmul.x %fp1,%fp3 # T(A5+TA7)
5116: fmul.x %fp1,%fp2 # T(A4+TA6)
5117:
5118: fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5119: fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5120:
5121: fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5122:
5123: fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5124: fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5125: fmul.x X(%a6),%fp0 # R'*S
5126:
5127: fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5128:
5129: fmul.x %fp1,%fp0 # SIN(R')-R'
5130:
5131: fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5132:
5133: fmov.l %d0,%fpcr # restore users round mode,prec
5134: fadd.x X(%a6),%fp0 # last inst - possible exception set
5135: bra t_inx2
5136:
5137: #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5138: #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5139: #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5140: #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5141: #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5142: #--WHERE T=S*S.
5143: #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5144: #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5145: #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5146: COSPOLY:
5147: fmovm.x &0x0c,-(%sp) # save fp2/fp3
5148:
5149: fmul.x %fp0,%fp0 # FP0 IS S
5150:
5151: fmov.d COSB8(%pc),%fp2
5152: fmov.d COSB7(%pc),%fp3
5153:
5154: fmov.x %fp0,%fp1
5155: fmul.x %fp1,%fp1 # FP1 IS T
5156:
5157: fmov.x %fp0,X(%a6) # X IS S
5158: ror.l &1,%d1
5159: and.l &0x80000000,%d1
5160: # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5161:
5162: fmul.x %fp1,%fp2 # TB8
5163:
5164: eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5165: and.l &0x80000000,%d1
5166:
5167: fmul.x %fp1,%fp3 # TB7
5168:
5169: or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5170: mov.l %d1,POSNEG1(%a6)
5171:
5172: fadd.d COSB6(%pc),%fp2 # B6+TB8
5173: fadd.d COSB5(%pc),%fp3 # B5+TB7
5174:
5175: fmul.x %fp1,%fp2 # T(B6+TB8)
5176: fmul.x %fp1,%fp3 # T(B5+TB7)
5177:
5178: fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5179: fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5180:
5181: fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5182: fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5183:
5184: fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5185: fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5186:
5187: fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5188:
5189: fadd.x %fp1,%fp0
5190:
5191: fmul.x X(%a6),%fp0
5192:
5193: fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5194:
5195: fmov.l %d0,%fpcr # restore users round mode,prec
5196: fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5197: bra t_inx2
5198:
5199: ##############################################
5200:
5201: # SINe: Big OR Small?
5202: #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5203: #--IF |X| < 2**(-40), RETURN X OR 1.
5204: SINBORS:
5205: cmp.l %d1,&0x3FFF8000
5206: bgt.l SREDUCEX
5207:
5208: SINSM:
5209: mov.l ADJN(%a6),%d1
5210: cmp.l %d1,&0
5211: bgt.b COSTINY
5212:
5213: # here, the operation may underflow iff the precision is sgl or dbl.
5214: # extended denorms are handled through another entry point.
5215: SINTINY:
5216: # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5217:
5218: fmov.l %d0,%fpcr # restore users round mode,prec
5219: mov.b &FMOV_OP,%d1 # last inst is MOVE
5220: fmov.x X(%a6),%fp0 # last inst - possible exception set
5221: bra t_catch
5222:
5223: COSTINY:
5224: fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5225: fmov.l %d0,%fpcr # restore users round mode,prec
5226: fadd.s &0x80800000,%fp0 # last inst - possible exception set
5227: bra t_pinx2
5228:
5229: ################################################
5230: global ssind
5231: #--SIN(X) = X FOR DENORMALIZED X
5232: ssind:
5233: bra t_extdnrm
5234:
5235: ############################################
5236: global scosd
5237: #--COS(X) = 1 FOR DENORMALIZED X
5238: scosd:
5239: fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5240: bra t_pinx2
5241:
5242: ##################################################
5243:
5244: global ssincos
5245: ssincos:
5246: #--SET ADJN TO 4
5247: mov.l &4,ADJN(%a6)
5248:
5249: fmov.x (%a0),%fp0 # LOAD INPUT
5250: fmov.x %fp0,X(%a6)
5251:
5252: mov.l (%a0),%d1
5253: mov.w 4(%a0),%d1
5254: and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5255:
5256: cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5257: bge.b SCOK1
5258: bra.w SCSM
5259:
5260: SCOK1:
5261: cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5262: blt.b SCMAIN
5263: bra.w SREDUCEX
5264:
5265:
5266: #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5267: #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5268: SCMAIN:
5269: fmov.x %fp0,%fp1
5270:
5271: fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5272:
5273: lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5274:
5275: fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5276:
5277: mov.l INT(%a6),%d1
5278: asl.l &4,%d1
5279: add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5280:
5281: fsub.x (%a1)+,%fp0 # X-Y1
5282: fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5283:
5284: SCCONT:
5285: #--continuation point from REDUCEX
5286:
5287: mov.l INT(%a6),%d1
5288: ror.l &1,%d1
5289: cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5290: bge.w NEVEN
5291:
5292: SNODD:
5293: #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5294: fmovm.x &0x04,-(%sp) # save fp2
5295:
5296: fmov.x %fp0,RPRIME(%a6)
5297: fmul.x %fp0,%fp0 # FP0 IS S = R*R
5298: fmov.d SINA7(%pc),%fp1 # A7
5299: fmov.d COSB8(%pc),%fp2 # B8
5300: fmul.x %fp0,%fp1 # SA7
5301: fmul.x %fp0,%fp2 # SB8
5302:
5303: mov.l %d2,-(%sp)
5304: mov.l %d1,%d2
5305: ror.l &1,%d2
5306: and.l &0x80000000,%d2
5307: eor.l %d1,%d2
5308: and.l &0x80000000,%d2
5309:
5310: fadd.d SINA6(%pc),%fp1 # A6+SA7
5311: fadd.d COSB7(%pc),%fp2 # B7+SB8
5312:
5313: fmul.x %fp0,%fp1 # S(A6+SA7)
5314: eor.l %d2,RPRIME(%a6)
5315: mov.l (%sp)+,%d2
5316: fmul.x %fp0,%fp2 # S(B7+SB8)
5317: ror.l &1,%d1
5318: and.l &0x80000000,%d1
5319: mov.l &0x3F800000,POSNEG1(%a6)
5320: eor.l %d1,POSNEG1(%a6)
5321:
5322: fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5323: fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5324:
5325: fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5326: fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5327: fmov.x %fp0,SPRIME(%a6)
5328:
5329: fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5330: eor.l %d1,SPRIME(%a6)
5331: fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5332:
5333: fmul.x %fp0,%fp1 # S(A4+...)
5334: fmul.x %fp0,%fp2 # S(B5+...)
5335:
5336: fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5337: fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5338:
5339: fmul.x %fp0,%fp1 # S(A3+...)
5340: fmul.x %fp0,%fp2 # S(B4+...)
5341:
5342: fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5343: fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5344:
5345: fmul.x %fp0,%fp1 # S(A2+...)
5346: fmul.x %fp0,%fp2 # S(B3+...)
5347:
5348: fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5349: fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5350:
5351: fmul.x %fp0,%fp1 # S(A1+...)
5352: fmul.x %fp2,%fp0 # S(B2+...)
5353:
5354: fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5355: fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5356: fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5357:
5358: fmovm.x (%sp)+,&0x20 # restore fp2
5359:
5360: fmov.l %d0,%fpcr
5361: fadd.x RPRIME(%a6),%fp1 # COS(X)
5362: bsr sto_cos # store cosine result
5363: fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5364: bra t_inx2
5365:
5366: NEVEN:
5367: #--REGISTERS SAVED SO FAR: FP2.
5368: fmovm.x &0x04,-(%sp) # save fp2
5369:
5370: fmov.x %fp0,RPRIME(%a6)
5371: fmul.x %fp0,%fp0 # FP0 IS S = R*R
5372:
5373: fmov.d COSB8(%pc),%fp1 # B8
5374: fmov.d SINA7(%pc),%fp2 # A7
5375:
5376: fmul.x %fp0,%fp1 # SB8
5377: fmov.x %fp0,SPRIME(%a6)
5378: fmul.x %fp0,%fp2 # SA7
5379:
5380: ror.l &1,%d1
5381: and.l &0x80000000,%d1
5382:
5383: fadd.d COSB7(%pc),%fp1 # B7+SB8
5384: fadd.d SINA6(%pc),%fp2 # A6+SA7
5385:
5386: eor.l %d1,RPRIME(%a6)
5387: eor.l %d1,SPRIME(%a6)
5388:
5389: fmul.x %fp0,%fp1 # S(B7+SB8)
5390:
5391: or.l &0x3F800000,%d1
5392: mov.l %d1,POSNEG1(%a6)
5393:
5394: fmul.x %fp0,%fp2 # S(A6+SA7)
5395:
5396: fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5397: fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5398:
5399: fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5400: fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5401:
5402: fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5403: fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5404:
5405: fmul.x %fp0,%fp1 # S(B5+...)
5406: fmul.x %fp0,%fp2 # S(A4+...)
5407:
5408: fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5409: fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5410:
5411: fmul.x %fp0,%fp1 # S(B4+...)
5412: fmul.x %fp0,%fp2 # S(A3+...)
5413:
5414: fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5415: fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5416:
5417: fmul.x %fp0,%fp1 # S(B3+...)
5418: fmul.x %fp0,%fp2 # S(A2+...)
5419:
5420: fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5421: fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5422:
5423: fmul.x %fp0,%fp1 # S(B2+...)
5424: fmul.x %fp2,%fp0 # s(a1+...)
5425:
5426:
5427: fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5428: fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5429: fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5430:
5431: fmovm.x (%sp)+,&0x20 # restore fp2
5432:
5433: fmov.l %d0,%fpcr
5434: fadd.s POSNEG1(%a6),%fp1 # COS(X)
5435: bsr sto_cos # store cosine result
5436: fadd.x RPRIME(%a6),%fp0 # SIN(X)
5437: bra t_inx2
5438:
5439: ################################################
5440:
5441: SCBORS:
5442: cmp.l %d1,&0x3FFF8000
5443: bgt.w SREDUCEX
5444:
5445: ################################################
5446:
5447: SCSM:
5448: # mov.w &0x0000,XDCARE(%a6)
5449: fmov.s &0x3F800000,%fp1
5450:
5451: fmov.l %d0,%fpcr
5452: fsub.s &0x00800000,%fp1
5453: bsr sto_cos # store cosine result
5454: fmov.l %fpcr,%d0 # d0 must have fpcr,too
5455: mov.b &FMOV_OP,%d1 # last inst is MOVE
5456: fmov.x X(%a6),%fp0
5457: bra t_catch
5458:
5459: ##############################################
5460:
5461: global ssincosd
5462: #--SIN AND COS OF X FOR DENORMALIZED X
5463: ssincosd:
5464: mov.l %d0,-(%sp) # save d0
5465: fmov.s &0x3F800000,%fp1
5466: bsr sto_cos # store cosine result
5467: mov.l (%sp)+,%d0 # restore d0
5468: bra t_extdnrm
5469:
5470: ############################################
5471:
5472: #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5473: #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5474: #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5475: SREDUCEX:
5476: fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5477: mov.l %d2,-(%sp) # save d2
5478: fmov.s &0x00000000,%fp1 # fp1 = 0
5479:
5480: #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5481: #--there is a danger of unwanted overflow in first LOOP iteration. In this
5482: #--case, reduce argument by one remainder step to make subsequent reduction
5483: #--safe.
5484: cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5485: bne.b SLOOP # no
5486:
5487: # yes; create 2**16383*PI/2
5488: mov.w &0x7ffe,FP_SCR0_EX(%a6)
5489: mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5490: clr.l FP_SCR0_LO(%a6)
5491:
5492: # create low half of 2**16383*PI/2 at FP_SCR1
5493: mov.w &0x7fdc,FP_SCR1_EX(%a6)
5494: mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5495: clr.l FP_SCR1_LO(%a6)
5496:
5497: ftest.x %fp0 # test sign of argument
5498: fblt.w sred_neg
5499:
5500: or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5501: or.b &0x80,FP_SCR1_EX(%a6)
5502: sred_neg:
5503: fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5504: fmov.x %fp0,%fp1 # save high result in fp1
5505: fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5506: fsub.x %fp0,%fp1 # determine low component of result
5507: fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5508:
5509: #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5510: #--integer quotient will be stored in N
5511: #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5512: SLOOP:
5513: fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5514: mov.w INARG(%a6),%d1
5515: mov.l %d1,%a1 # save a copy of D0
5516: and.l &0x00007FFF,%d1
5517: sub.l &0x00003FFF,%d1 # d0 = K
5518: cmp.l %d1,&28
5519: ble.b SLASTLOOP
5520: SCONTLOOP:
5521: sub.l &27,%d1 # d0 = L := K-27
5522: mov.b &0,ENDFLAG(%a6)
5523: bra.b SWORK
5524: SLASTLOOP:
5525: clr.l %d1 # d0 = L := 0
5526: mov.b &1,ENDFLAG(%a6)
5527:
5528: SWORK:
5529: #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5530: #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5531:
5532: #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5533: #--2**L * (PIby2_1), 2**L * (PIby2_2)
5534:
5535: mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5536: sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5537:
5538: mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5539: mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5540: mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5541:
5542: fmov.x %fp0,%fp2
5543: fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5544:
5545: #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5546: #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5547: #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5548: #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5549: #--US THE DESIRED VALUE IN FLOATING POINT.
5550: mov.l %a1,%d2
5551: swap %d2
5552: and.l &0x80000000,%d2
5553: or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5554: mov.l %d2,TWOTO63(%a6)
5555: fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5556: fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5557: # fint.x %fp2
5558:
5559: #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5560: mov.l %d1,%d2 # d2 = L
5561:
5562: add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5563: mov.w %d2,FP_SCR0_EX(%a6)
5564: mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5565: clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5566:
5567: add.l &0x00003FDD,%d1
5568: mov.w %d1,FP_SCR1_EX(%a6)
5569: mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5570: clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5571:
5572: mov.b ENDFLAG(%a6),%d1
5573:
5574: #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5575: #--P2 = 2**(L) * Piby2_2
5576: fmov.x %fp2,%fp4 # fp4 = N
5577: fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5578: fmov.x %fp2,%fp5 # fp5 = N
5579: fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5580: fmov.x %fp4,%fp3 # fp3 = W = N*P1
5581:
5582: #--we want P+p = W+w but |p| <= half ulp of P
5583: #--Then, we need to compute A := R-P and a := r-p
5584: fadd.x %fp5,%fp3 # fp3 = P
5585: fsub.x %fp3,%fp4 # fp4 = W-P
5586:
5587: fsub.x %fp3,%fp0 # fp0 = A := R - P
5588: fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5589:
5590: fmov.x %fp0,%fp3 # fp3 = A
5591: fsub.x %fp4,%fp1 # fp1 = a := r - p
5592:
5593: #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5594: #--|r| <= half ulp of R.
5595: fadd.x %fp1,%fp0 # fp0 = R := A+a
5596: #--No need to calculate r if this is the last loop
5597: cmp.b %d1,&0
5598: bgt.w SRESTORE
5599:
5600: #--Need to calculate r
5601: fsub.x %fp0,%fp3 # fp3 = A-R
5602: fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5603: bra.w SLOOP
5604:
5605: SRESTORE:
5606: fmov.l %fp2,INT(%a6)
5607: mov.l (%sp)+,%d2 # restore d2
5608: fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5609:
5610: mov.l ADJN(%a6),%d1
5611: cmp.l %d1,&4
5612:
5613: blt.w SINCONT
5614: bra.w SCCONT
5615:
5616: #########################################################################
5617: # stan(): computes the tangent of a normalized input #
5618: # stand(): computes the tangent of a denormalized input #
5619: # #
5620: # INPUT *************************************************************** #
5621: # a0 = pointer to extended precision input #
5622: # d0 = round precision,mode #
5623: # #
5624: # OUTPUT ************************************************************** #
5625: # fp0 = tan(X) #
5626: # #
5627: # ACCURACY and MONOTONICITY ******************************************* #
5628: # The returned result is within 3 ulp in 64 significant bit, i.e. #
5629: # within 0.5001 ulp to 53 bits if the result is subsequently #
5630: # rounded to double precision. The result is provably monotonic #
5631: # in double precision. #
5632: # #
5633: # ALGORITHM *********************************************************** #
5634: # #
5635: # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5636: # #
5637: # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5638: # k = N mod 2, so in particular, k = 0 or 1. #
5639: # #
5640: # 3. If k is odd, go to 5. #
5641: # #
5642: # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5643: # rational function U/V where #
5644: # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5645: # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5646: # Exit. #
5647: # #
5648: # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5649: # a rational function U/V where #
5650: # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5651: # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5652: # -Cot(r) = -V/U. Exit. #
5653: # #
5654: # 6. If |X| > 1, go to 8. #
5655: # #
5656: # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5657: # #
5658: # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5659: # to 2. #
5660: # #
5661: #########################################################################
5662:
5663: TANQ4:
5664: long 0x3EA0B759,0xF50F8688
5665: TANP3:
5666: long 0xBEF2BAA5,0xA8924F04
5667:
5668: TANQ3:
5669: long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5670:
5671: TANP2:
5672: long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5673:
5674: TANQ2:
5675: long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5676:
5677: TANP1:
5678: long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5679:
5680: TANQ1:
5681: long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5682:
5683: INVTWOPI:
5684: long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5685:
5686: TWOPI1:
5687: long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5688: TWOPI2:
5689: long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5690:
5691: #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5692: #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5693: #--MOST 69 BITS LONG.
5694: # global PITBL
5695: PITBL:
5696: long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5697: long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5698: long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5699: long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5700: long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5701: long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5702: long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5703: long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5704: long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5705: long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5706: long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5707: long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5708: long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5709: long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5710: long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5711: long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5712: long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5713: long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5714: long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5715: long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5716: long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5717: long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5718: long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5719: long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5720: long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5721: long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5722: long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5723: long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5724: long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5725: long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5726: long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5727: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5728: long 0x00000000,0x00000000,0x00000000,0x00000000
5729: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5730: long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5731: long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5732: long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5733: long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5734: long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5735: long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5736: long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5737: long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5738: long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5739: long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5740: long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5741: long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5742: long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5743: long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5744: long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5745: long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5746: long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5747: long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5748: long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5749: long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5750: long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5751: long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5752: long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5753: long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5754: long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5755: long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5756: long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5757: long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5758: long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5759: long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5760: long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5761:
5762: set INARG,FP_SCR0
5763:
5764: set TWOTO63,L_SCR1
5765: set INT,L_SCR1
5766: set ENDFLAG,L_SCR2
5767:
5768: global stan
5769: stan:
5770: fmov.x (%a0),%fp0 # LOAD INPUT
5771:
5772: mov.l (%a0),%d1
5773: mov.w 4(%a0),%d1
5774: and.l &0x7FFFFFFF,%d1
5775:
5776: cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5777: bge.b TANOK1
5778: bra.w TANSM
5779: TANOK1:
5780: cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5781: blt.b TANMAIN
5782: bra.w REDUCEX
5783:
5784: TANMAIN:
5785: #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5786: #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5787: fmov.x %fp0,%fp1
5788: fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5789:
5790: lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5791:
5792: fmov.l %fp1,%d1 # CONVERT TO INTEGER
5793:
5794: asl.l &4,%d1
5795: add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5796:
5797: fsub.x (%a1)+,%fp0 # X-Y1
5798:
5799: fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5800:
5801: ror.l &5,%d1
5802: and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5803:
5804: TANCONT:
5805: fmovm.x &0x0c,-(%sp) # save fp2,fp3
5806:
5807: cmp.l %d1,&0
5808: blt.w NODD
5809:
5810: fmov.x %fp0,%fp1
5811: fmul.x %fp1,%fp1 # S = R*R
5812:
5813: fmov.d TANQ4(%pc),%fp3
5814: fmov.d TANP3(%pc),%fp2
5815:
5816: fmul.x %fp1,%fp3 # SQ4
5817: fmul.x %fp1,%fp2 # SP3
5818:
5819: fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5820: fadd.x TANP2(%pc),%fp2 # P2+SP3
5821:
5822: fmul.x %fp1,%fp3 # S(Q3+SQ4)
5823: fmul.x %fp1,%fp2 # S(P2+SP3)
5824:
5825: fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5826: fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5827:
5828: fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5829: fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5830:
5831: fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5832: fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5833:
5834: fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5835:
5836: fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5837:
5838: fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5839:
5840: fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5841:
5842: fmov.l %d0,%fpcr # restore users round mode,prec
5843: fdiv.x %fp1,%fp0 # last inst - possible exception set
5844: bra t_inx2
5845:
5846: NODD:
5847: fmov.x %fp0,%fp1
5848: fmul.x %fp0,%fp0 # S = R*R
5849:
5850: fmov.d TANQ4(%pc),%fp3
5851: fmov.d TANP3(%pc),%fp2
5852:
5853: fmul.x %fp0,%fp3 # SQ4
5854: fmul.x %fp0,%fp2 # SP3
5855:
5856: fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5857: fadd.x TANP2(%pc),%fp2 # P2+SP3
5858:
5859: fmul.x %fp0,%fp3 # S(Q3+SQ4)
5860: fmul.x %fp0,%fp2 # S(P2+SP3)
5861:
5862: fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5863: fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5864:
5865: fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5866: fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5867:
5868: fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5869: fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5870:
5871: fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5872:
5873: fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5874: fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5875:
5876: fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5877:
5878: fmov.x %fp1,-(%sp)
5879: eor.l &0x80000000,(%sp)
5880:
5881: fmov.l %d0,%fpcr # restore users round mode,prec
5882: fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5883: bra t_inx2
5884:
5885: TANBORS:
5886: #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5887: #--IF |X| < 2**(-40), RETURN X OR 1.
5888: cmp.l %d1,&0x3FFF8000
5889: bgt.b REDUCEX
5890:
5891: TANSM:
5892: fmov.x %fp0,-(%sp)
5893: fmov.l %d0,%fpcr # restore users round mode,prec
5894: mov.b &FMOV_OP,%d1 # last inst is MOVE
5895: fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5896: bra t_catch
5897:
5898: global stand
5899: #--TAN(X) = X FOR DENORMALIZED X
5900: stand:
5901: bra t_extdnrm
5902:
5903: #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5904: #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5905: #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5906: REDUCEX:
5907: fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5908: mov.l %d2,-(%sp) # save d2
5909: fmov.s &0x00000000,%fp1 # fp1 = 0
5910:
5911: #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5912: #--there is a danger of unwanted overflow in first LOOP iteration. In this
5913: #--case, reduce argument by one remainder step to make subsequent reduction
5914: #--safe.
5915: cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5916: bne.b LOOP # no
5917:
5918: # yes; create 2**16383*PI/2
5919: mov.w &0x7ffe,FP_SCR0_EX(%a6)
5920: mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5921: clr.l FP_SCR0_LO(%a6)
5922:
5923: # create low half of 2**16383*PI/2 at FP_SCR1
5924: mov.w &0x7fdc,FP_SCR1_EX(%a6)
5925: mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5926: clr.l FP_SCR1_LO(%a6)
5927:
5928: ftest.x %fp0 # test sign of argument
5929: fblt.w red_neg
5930:
5931: or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5932: or.b &0x80,FP_SCR1_EX(%a6)
5933: red_neg:
5934: fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5935: fmov.x %fp0,%fp1 # save high result in fp1
5936: fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5937: fsub.x %fp0,%fp1 # determine low component of result
5938: fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5939:
5940: #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5941: #--integer quotient will be stored in N
5942: #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5943: LOOP:
5944: fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5945: mov.w INARG(%a6),%d1
5946: mov.l %d1,%a1 # save a copy of D0
5947: and.l &0x00007FFF,%d1
5948: sub.l &0x00003FFF,%d1 # d0 = K
5949: cmp.l %d1,&28
5950: ble.b LASTLOOP
5951: CONTLOOP:
5952: sub.l &27,%d1 # d0 = L := K-27
5953: mov.b &0,ENDFLAG(%a6)
5954: bra.b WORK
5955: LASTLOOP:
5956: clr.l %d1 # d0 = L := 0
5957: mov.b &1,ENDFLAG(%a6)
5958:
5959: WORK:
5960: #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5961: #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5962:
5963: #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5964: #--2**L * (PIby2_1), 2**L * (PIby2_2)
5965:
5966: mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5967: sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5968:
5969: mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5970: mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5971: mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5972:
5973: fmov.x %fp0,%fp2
5974: fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5975:
5976: #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5977: #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5978: #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5979: #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5980: #--US THE DESIRED VALUE IN FLOATING POINT.
5981: mov.l %a1,%d2
5982: swap %d2
5983: and.l &0x80000000,%d2
5984: or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5985: mov.l %d2,TWOTO63(%a6)
5986: fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5987: fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5988: # fintrz.x %fp2,%fp2
5989:
5990: #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5991: mov.l %d1,%d2 # d2 = L
5992:
5993: add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5994: mov.w %d2,FP_SCR0_EX(%a6)
5995: mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5996: clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5997:
5998: add.l &0x00003FDD,%d1
5999: mov.w %d1,FP_SCR1_EX(%a6)
6000: mov.l &0x85A308D3,FP_SCR1_HI(%a6)
6001: clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
6002:
6003: mov.b ENDFLAG(%a6),%d1
6004:
6005: #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6006: #--P2 = 2**(L) * Piby2_2
6007: fmov.x %fp2,%fp4 # fp4 = N
6008: fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
6009: fmov.x %fp2,%fp5 # fp5 = N
6010: fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6011: fmov.x %fp4,%fp3 # fp3 = W = N*P1
6012:
6013: #--we want P+p = W+w but |p| <= half ulp of P
6014: #--Then, we need to compute A := R-P and a := r-p
6015: fadd.x %fp5,%fp3 # fp3 = P
6016: fsub.x %fp3,%fp4 # fp4 = W-P
6017:
6018: fsub.x %fp3,%fp0 # fp0 = A := R - P
6019: fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6020:
6021: fmov.x %fp0,%fp3 # fp3 = A
6022: fsub.x %fp4,%fp1 # fp1 = a := r - p
6023:
6024: #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6025: #--|r| <= half ulp of R.
6026: fadd.x %fp1,%fp0 # fp0 = R := A+a
6027: #--No need to calculate r if this is the last loop
6028: cmp.b %d1,&0
6029: bgt.w RESTORE
6030:
6031: #--Need to calculate r
6032: fsub.x %fp0,%fp3 # fp3 = A-R
6033: fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6034: bra.w LOOP
6035:
6036: RESTORE:
6037: fmov.l %fp2,INT(%a6)
6038: mov.l (%sp)+,%d2 # restore d2
6039: fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6040:
6041: mov.l INT(%a6),%d1
6042: ror.l &1,%d1
6043:
6044: bra.w TANCONT
6045:
6046: #########################################################################
6047: # satan(): computes the arctangent of a normalized number #
6048: # satand(): computes the arctangent of a denormalized number #
6049: # #
6050: # INPUT *************************************************************** #
6051: # a0 = pointer to extended precision input #
6052: # d0 = round precision,mode #
6053: # #
6054: # OUTPUT ************************************************************** #
6055: # fp0 = arctan(X) #
6056: # #
6057: # ACCURACY and MONOTONICITY ******************************************* #
6058: # The returned result is within 2 ulps in 64 significant bit, #
6059: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6060: # rounded to double precision. The result is provably monotonic #
6061: # in double precision. #
6062: # #
6063: # ALGORITHM *********************************************************** #
6064: # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6065: # #
6066: # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6067: # Note that k = -4, -3,..., or 3. #
6068: # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6069: # significant bits of X with a bit-1 attached at the 6-th #
6070: # bit position. Define u to be u = (X-F) / (1 + X*F). #
6071: # #
6072: # Step 3. Approximate arctan(u) by a polynomial poly. #
6073: # #
6074: # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6075: # table of values calculated beforehand. Exit. #
6076: # #
6077: # Step 5. If |X| >= 16, go to Step 7. #
6078: # #
6079: # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6080: # #
6081: # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6082: # polynomial in X'. #
6083: # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6084: # #
6085: #########################################################################
6086:
6087: ATANA3: long 0xBFF6687E,0x314987D8
6088: ATANA2: long 0x4002AC69,0x34A26DB3
6089: ATANA1: long 0xBFC2476F,0x4E1DA28E
6090:
6091: ATANB6: long 0x3FB34444,0x7F876989
6092: ATANB5: long 0xBFB744EE,0x7FAF45DB
6093: ATANB4: long 0x3FBC71C6,0x46940220
6094: ATANB3: long 0xBFC24924,0x921872F9
6095: ATANB2: long 0x3FC99999,0x99998FA9
6096: ATANB1: long 0xBFD55555,0x55555555
6097:
6098: ATANC5: long 0xBFB70BF3,0x98539E6A
6099: ATANC4: long 0x3FBC7187,0x962D1D7D
6100: ATANC3: long 0xBFC24924,0x827107B8
6101: ATANC2: long 0x3FC99999,0x9996263E
6102: ATANC1: long 0xBFD55555,0x55555536
6103:
6104: PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6105: NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6106:
6107: PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6108: NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6109:
6110: ATANTBL:
6111: long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6112: long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6113: long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6114: long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6115: long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6116: long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6117: long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6118: long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6119: long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6120: long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6121: long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6122: long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6123: long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6124: long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6125: long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6126: long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6127: long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6128: long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6129: long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6130: long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6131: long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6132: long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6133: long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6134: long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6135: long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6136: long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6137: long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6138: long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6139: long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6140: long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6141: long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6142: long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6143: long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6144: long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6145: long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6146: long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6147: long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6148: long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6149: long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6150: long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6151: long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6152: long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6153: long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6154: long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6155: long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6156: long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6157: long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6158: long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6159: long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6160: long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6161: long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6162: long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6163: long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6164: long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6165: long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6166: long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6167: long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6168: long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6169: long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6170: long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6171: long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6172: long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6173: long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6174: long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6175: long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6176: long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6177: long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6178: long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6179: long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6180: long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6181: long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6182: long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6183: long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6184: long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6185: long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6186: long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6187: long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6188: long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6189: long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6190: long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6191: long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6192: long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6193: long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6194: long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6195: long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6196: long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6197: long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6198: long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6199: long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6200: long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6201: long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6202: long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6203: long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6204: long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6205: long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6206: long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6207: long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6208: long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6209: long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6210: long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6211: long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6212: long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6213: long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6214: long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6215: long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6216: long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6217: long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6218: long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6219: long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6220: long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6221: long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6222: long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6223: long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6224: long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6225: long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6226: long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6227: long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6228: long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6229: long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6230: long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6231: long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6232: long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6233: long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6234: long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6235: long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6236: long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6237: long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6238: long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6239:
6240: set X,FP_SCR0
6241: set XDCARE,X+2
6242: set XFRAC,X+4
6243: set XFRACLO,X+8
6244:
6245: set ATANF,FP_SCR1
6246: set ATANFHI,ATANF+4
6247: set ATANFLO,ATANF+8
6248:
6249: global satan
6250: #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6251: satan:
6252: fmov.x (%a0),%fp0 # LOAD INPUT
6253:
6254: mov.l (%a0),%d1
6255: mov.w 4(%a0),%d1
6256: fmov.x %fp0,X(%a6)
6257: and.l &0x7FFFFFFF,%d1
6258:
6259: cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6260: bge.b ATANOK1
6261: bra.w ATANSM
6262:
6263: ATANOK1:
6264: cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6265: ble.b ATANMAIN
6266: bra.w ATANBIG
6267:
6268: #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6269: #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6270: #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6271: #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6272: #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6273: #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6274: #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6275: #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6276: #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6277: #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6278: #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6279: #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6280: #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6281:
6282: #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6283: #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6284: #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6285: #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6286: #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6287: #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6288:
6289: ATANMAIN:
6290:
6291: and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6292: or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6293: mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6294:
6295: fmov.x %fp0,%fp1 # FP1 IS X
6296: fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6297: fsub.x X(%a6),%fp0 # FP0 IS X-F
6298: fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6299: fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6300:
6301: #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6302: #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6303: #--SAVE REGISTERS FP2.
6304:
6305: mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6306: mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6307: and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6308: and.l &0x7FFF0000,%d2 # EXPONENT OF F
6309: sub.l &0x3FFB0000,%d2 # K+4
6310: asr.l &1,%d2
6311: add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6312: asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6313: lea ATANTBL(%pc),%a1
6314: add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6315: mov.l (%a1)+,ATANF(%a6)
6316: mov.l (%a1)+,ATANFHI(%a6)
6317: mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6318: mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6319: and.l &0x80000000,%d1 # SIGN(F)
6320: or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6321: mov.l (%sp)+,%d2 # RESTORE d2
6322:
6323: #--THAT'S ALL I HAVE TO DO FOR NOW,
6324: #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6325:
6326: #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6327: #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6328: #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6329: #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6330: #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6331: #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6332: #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6333:
6334: fmovm.x &0x04,-(%sp) # save fp2
6335:
6336: fmov.x %fp0,%fp1
6337: fmul.x %fp1,%fp1
6338: fmov.d ATANA3(%pc),%fp2
6339: fadd.x %fp1,%fp2 # A3+V
6340: fmul.x %fp1,%fp2 # V*(A3+V)
6341: fmul.x %fp0,%fp1 # U*V
6342: fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6343: fmul.d ATANA1(%pc),%fp1 # A1*U*V
6344: fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6345: fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6346:
6347: fmovm.x (%sp)+,&0x20 # restore fp2
6348:
6349: fmov.l %d0,%fpcr # restore users rnd mode,prec
6350: fadd.x ATANF(%a6),%fp0 # ATAN(X)
6351: bra t_inx2
6352:
6353: ATANBORS:
6354: #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6355: #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6356: cmp.l %d1,&0x3FFF8000
6357: bgt.w ATANBIG # I.E. |X| >= 16
6358:
6359: ATANSM:
6360: #--|X| <= 1/16
6361: #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6362: #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6363: #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6364: #--WHERE Y = X*X, AND Z = Y*Y.
6365:
6366: cmp.l %d1,&0x3FD78000
6367: blt.w ATANTINY
6368:
6369: #--COMPUTE POLYNOMIAL
6370: fmovm.x &0x0c,-(%sp) # save fp2/fp3
6371:
6372: fmul.x %fp0,%fp0 # FPO IS Y = X*X
6373:
6374: fmov.x %fp0,%fp1
6375: fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6376:
6377: fmov.d ATANB6(%pc),%fp2
6378: fmov.d ATANB5(%pc),%fp3
6379:
6380: fmul.x %fp1,%fp2 # Z*B6
6381: fmul.x %fp1,%fp3 # Z*B5
6382:
6383: fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6384: fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6385:
6386: fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6387: fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6388:
6389: fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6390: fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6391:
6392: fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6393: fmul.x X(%a6),%fp0 # X*Y
6394:
6395: fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6396:
6397: fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6398:
6399: fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6400:
6401: fmov.l %d0,%fpcr # restore users rnd mode,prec
6402: fadd.x X(%a6),%fp0
6403: bra t_inx2
6404:
6405: ATANTINY:
6406: #--|X| < 2^(-40), ATAN(X) = X
6407:
6408: fmov.l %d0,%fpcr # restore users rnd mode,prec
6409: mov.b &FMOV_OP,%d1 # last inst is MOVE
6410: fmov.x X(%a6),%fp0 # last inst - possible exception set
6411:
6412: bra t_catch
6413:
6414: ATANBIG:
6415: #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6416: #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6417: cmp.l %d1,&0x40638000
6418: bgt.w ATANHUGE
6419:
6420: #--APPROXIMATE ATAN(-1/X) BY
6421: #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6422: #--THIS CAN BE RE-WRITTEN AS
6423: #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6424:
6425: fmovm.x &0x0c,-(%sp) # save fp2/fp3
6426:
6427: fmov.s &0xBF800000,%fp1 # LOAD -1
6428: fdiv.x %fp0,%fp1 # FP1 IS -1/X
6429:
6430: #--DIVIDE IS STILL CRANKING
6431:
6432: fmov.x %fp1,%fp0 # FP0 IS X'
6433: fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6434: fmov.x %fp1,X(%a6) # X IS REALLY X'
6435:
6436: fmov.x %fp0,%fp1
6437: fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6438:
6439: fmov.d ATANC5(%pc),%fp3
6440: fmov.d ATANC4(%pc),%fp2
6441:
6442: fmul.x %fp1,%fp3 # Z*C5
6443: fmul.x %fp1,%fp2 # Z*B4
6444:
6445: fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6446: fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6447:
6448: fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6449: fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6450:
6451: fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6452: fmul.x X(%a6),%fp0 # X'*Y
6453:
6454: fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6455:
6456: fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6457: # ... +[Y*(B2+Z*(B4+Z*B6))])
6458: fadd.x X(%a6),%fp0
6459:
6460: fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6461:
6462: fmov.l %d0,%fpcr # restore users rnd mode,prec
6463: tst.b (%a0)
6464: bpl.b pos_big
6465:
6466: neg_big:
6467: fadd.x NPIBY2(%pc),%fp0
6468: bra t_minx2
6469:
6470: pos_big:
6471: fadd.x PPIBY2(%pc),%fp0
6472: bra t_pinx2
6473:
6474: ATANHUGE:
6475: #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6476: tst.b (%a0)
6477: bpl.b pos_huge
6478:
6479: neg_huge:
6480: fmov.x NPIBY2(%pc),%fp0
6481: fmov.l %d0,%fpcr
6482: fadd.x PTINY(%pc),%fp0
6483: bra t_minx2
6484:
6485: pos_huge:
6486: fmov.x PPIBY2(%pc),%fp0
6487: fmov.l %d0,%fpcr
6488: fadd.x NTINY(%pc),%fp0
6489: bra t_pinx2
6490:
6491: global satand
6492: #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6493: satand:
6494: bra t_extdnrm
6495:
6496: #########################################################################
6497: # sasin(): computes the inverse sine of a normalized input #
6498: # sasind(): computes the inverse sine of a denormalized input #
6499: # #
6500: # INPUT *************************************************************** #
6501: # a0 = pointer to extended precision input #
6502: # d0 = round precision,mode #
6503: # #
6504: # OUTPUT ************************************************************** #
6505: # fp0 = arcsin(X) #
6506: # #
6507: # ACCURACY and MONOTONICITY ******************************************* #
6508: # The returned result is within 3 ulps in 64 significant bit, #
6509: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6510: # rounded to double precision. The result is provably monotonic #
6511: # in double precision. #
6512: # #
6513: # ALGORITHM *********************************************************** #
6514: # #
6515: # ASIN #
6516: # 1. If |X| >= 1, go to 3. #
6517: # #
6518: # 2. (|X| < 1) Calculate asin(X) by #
6519: # z := sqrt( [1-X][1+X] ) #
6520: # asin(X) = atan( x / z ). #
6521: # Exit. #
6522: # #
6523: # 3. If |X| > 1, go to 5. #
6524: # #
6525: # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6526: # #
6527: # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6528: # Exit. #
6529: # #
6530: #########################################################################
6531:
6532: global sasin
6533: sasin:
6534: fmov.x (%a0),%fp0 # LOAD INPUT
6535:
6536: mov.l (%a0),%d1
6537: mov.w 4(%a0),%d1
6538: and.l &0x7FFFFFFF,%d1
6539: cmp.l %d1,&0x3FFF8000
6540: bge.b ASINBIG
6541:
6542: # This catch is added here for the '060 QSP. Originally, the call to
6543: # satan() would handle this case by causing the exception which would
6544: # not be caught until gen_except(). Now, with the exceptions being
6545: # detected inside of satan(), the exception would have been handled there
6546: # instead of inside sasin() as expected.
6547: cmp.l %d1,&0x3FD78000
6548: blt.w ASINTINY
6549:
6550: #--THIS IS THE USUAL CASE, |X| < 1
6551: #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6552:
6553: ASINMAIN:
6554: fmov.s &0x3F800000,%fp1
6555: fsub.x %fp0,%fp1 # 1-X
6556: fmovm.x &0x4,-(%sp) # {fp2}
6557: fmov.s &0x3F800000,%fp2
6558: fadd.x %fp0,%fp2 # 1+X
6559: fmul.x %fp2,%fp1 # (1+X)(1-X)
6560: fmovm.x (%sp)+,&0x20 # {fp2}
6561: fsqrt.x %fp1 # SQRT([1-X][1+X])
6562: fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6563: fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6564: lea (%sp),%a0 # pass ptr to X/SQRT(...)
6565: bsr satan
6566: add.l &0xc,%sp # clear X/SQRT(...) from stack
6567: bra t_inx2
6568:
6569: ASINBIG:
6570: fabs.x %fp0 # |X|
6571: fcmp.s %fp0,&0x3F800000
6572: fbgt t_operr # cause an operr exception
6573:
6574: #--|X| = 1, ASIN(X) = +- PI/2.
6575: ASINONE:
6576: fmov.x PIBY2(%pc),%fp0
6577: mov.l (%a0),%d1
6578: and.l &0x80000000,%d1 # SIGN BIT OF X
6579: or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6580: mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6581: fmov.l %d0,%fpcr
6582: fmul.s (%sp)+,%fp0
6583: bra t_inx2
6584:
6585: #--|X| < 2^(-40), ATAN(X) = X
6586: ASINTINY:
6587: fmov.l %d0,%fpcr # restore users rnd mode,prec
6588: mov.b &FMOV_OP,%d1 # last inst is MOVE
6589: fmov.x (%a0),%fp0 # last inst - possible exception
6590: bra t_catch
6591:
6592: global sasind
6593: #--ASIN(X) = X FOR DENORMALIZED X
6594: sasind:
6595: bra t_extdnrm
6596:
6597: #########################################################################
6598: # sacos(): computes the inverse cosine of a normalized input #
6599: # sacosd(): computes the inverse cosine of a denormalized input #
6600: # #
6601: # INPUT *************************************************************** #
6602: # a0 = pointer to extended precision input #
6603: # d0 = round precision,mode #
6604: # #
6605: # OUTPUT ************************************************************** #
6606: # fp0 = arccos(X) #
6607: # #
6608: # ACCURACY and MONOTONICITY ******************************************* #
6609: # The returned result is within 3 ulps in 64 significant bit, #
6610: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6611: # rounded to double precision. The result is provably monotonic #
6612: # in double precision. #
6613: # #
6614: # ALGORITHM *********************************************************** #
6615: # #
6616: # ACOS #
6617: # 1. If |X| >= 1, go to 3. #
6618: # #
6619: # 2. (|X| < 1) Calculate acos(X) by #
6620: # z := (1-X) / (1+X) #
6621: # acos(X) = 2 * atan( sqrt(z) ). #
6622: # Exit. #
6623: # #
6624: # 3. If |X| > 1, go to 5. #
6625: # #
6626: # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6627: # #
6628: # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6629: # Exit. #
6630: # #
6631: #########################################################################
6632:
6633: global sacos
6634: sacos:
6635: fmov.x (%a0),%fp0 # LOAD INPUT
6636:
6637: mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6638: mov.w 4(%a0),%d1
6639: and.l &0x7FFFFFFF,%d1
6640: cmp.l %d1,&0x3FFF8000
6641: bge.b ACOSBIG
6642:
6643: #--THIS IS THE USUAL CASE, |X| < 1
6644: #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6645:
6646: ACOSMAIN:
6647: fmov.s &0x3F800000,%fp1
6648: fadd.x %fp0,%fp1 # 1+X
6649: fneg.x %fp0 # -X
6650: fadd.s &0x3F800000,%fp0 # 1-X
6651: fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6652: fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6653: mov.l %d0,-(%sp) # save original users fpcr
6654: clr.l %d0
6655: fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6656: lea (%sp),%a0 # pass ptr to sqrt
6657: bsr satan # ATAN(SQRT([1-X]/[1+X]))
6658: add.l &0xc,%sp # clear SQRT(...) from stack
6659:
6660: fmov.l (%sp)+,%fpcr # restore users round prec,mode
6661: fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6662: bra t_pinx2
6663:
6664: ACOSBIG:
6665: fabs.x %fp0
6666: fcmp.s %fp0,&0x3F800000
6667: fbgt t_operr # cause an operr exception
6668:
6669: #--|X| = 1, ACOS(X) = 0 OR PI
6670: tst.b (%a0) # is X positive or negative?
6671: bpl.b ACOSP1
6672:
6673: #--X = -1
6674: #Returns PI and inexact exception
6675: ACOSM1:
6676: fmov.x PI(%pc),%fp0 # load PI
6677: fmov.l %d0,%fpcr # load round mode,prec
6678: fadd.s &0x00800000,%fp0 # add a small value
6679: bra t_pinx2
6680:
6681: ACOSP1:
6682: bra ld_pzero # answer is positive zero
6683:
6684: global sacosd
6685: #--ACOS(X) = PI/2 FOR DENORMALIZED X
6686: sacosd:
6687: fmov.l %d0,%fpcr # load user's rnd mode/prec
6688: fmov.x PIBY2(%pc),%fp0
6689: bra t_pinx2
6690:
6691: #########################################################################
6692: # setox(): computes the exponential for a normalized input #
6693: # setoxd(): computes the exponential for a denormalized input #
6694: # setoxm1(): computes the exponential minus 1 for a normalized input #
6695: # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6696: # #
6697: # INPUT *************************************************************** #
6698: # a0 = pointer to extended precision input #
6699: # d0 = round precision,mode #
6700: # #
6701: # OUTPUT ************************************************************** #
6702: # fp0 = exp(X) or exp(X)-1 #
6703: # #
6704: # ACCURACY and MONOTONICITY ******************************************* #
6705: # The returned result is within 0.85 ulps in 64 significant bit, #
6706: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6707: # rounded to double precision. The result is provably monotonic #
6708: # in double precision. #
6709: # #
6710: # ALGORITHM and IMPLEMENTATION **************************************** #
6711: # #
6712: # setoxd #
6713: # ------ #
6714: # Step 1. Set ans := 1.0 #
6715: # #
6716: # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6717: # Notes: This will always generate one exception -- inexact. #
6718: # #
6719: # #
6720: # setox #
6721: # ----- #
6722: # #
6723: # Step 1. Filter out extreme cases of input argument. #
6724: # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6725: # 1.2 Go to Step 7. #
6726: # 1.3 If |X| < 16380 log(2), go to Step 2. #
6727: # 1.4 Go to Step 8. #
6728: # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6729: # To avoid the use of floating-point comparisons, a #
6730: # compact representation of |X| is used. This format is a #
6731: # 32-bit integer, the upper (more significant) 16 bits #
6732: # are the sign and biased exponent field of |X|; the #
6733: # lower 16 bits are the 16 most significant fraction #
6734: # (including the explicit bit) bits of |X|. Consequently, #
6735: # the comparisons in Steps 1.1 and 1.3 can be performed #
6736: # by integer comparison. Note also that the constant #
6737: # 16380 log(2) used in Step 1.3 is also in the compact #
6738: # form. Thus taking the branch to Step 2 guarantees #
6739: # |X| < 16380 log(2). There is no harm to have a small #
6740: # number of cases where |X| is less than, but close to, #
6741: # 16380 log(2) and the branch to Step 9 is taken. #
6742: # #
6743: # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6744: # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6745: # was taken) #
6746: # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6747: # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6748: # or 63. #
6749: # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6750: # 2.5 Calculate the address of the stored value of #
6751: # 2^(J/64). #
6752: # 2.6 Create the value Scale = 2^M. #
6753: # Notes: The calculation in 2.2 is really performed by #
6754: # Z := X * constant #
6755: # N := round-to-nearest-integer(Z) #
6756: # where #
6757: # constant := single-precision( 64/log 2 ). #
6758: # #
6759: # Using a single-precision constant avoids memory #
6760: # access. Another effect of using a single-precision #
6761: # "constant" is that the calculated value Z is #
6762: # #
6763: # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6764: # #
6765: # This error has to be considered later in Steps 3 and 4. #
6766: # #
6767: # Step 3. Calculate X - N*log2/64. #
6768: # 3.1 R := X + N*L1, #
6769: # where L1 := single-precision(-log2/64). #
6770: # 3.2 R := R + N*L2, #
6771: # L2 := extended-precision(-log2/64 - L1).#
6772: # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6773: # approximate the value -log2/64 to 88 bits of accuracy. #
6774: # b) N*L1 is exact because N is no longer than 22 bits #
6775: # and L1 is no longer than 24 bits. #
6776: # c) The calculation X+N*L1 is also exact due to #
6777: # cancellation. Thus, R is practically X+N(L1+L2) to full #
6778: # 64 bits. #
6779: # d) It is important to estimate how large can |R| be #
6780: # after Step 3.2. #
6781: # #
6782: # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6783: # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6784: # X*64/log2 - N = f - eps*X 64/log2 #
6785: # X - N*log2/64 = f*log2/64 - eps*X #
6786: # #
6787: # #
6788: # Now |X| <= 16446 log2, thus #
6789: # #
6790: # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6791: # <= 0.57 log2/64. #
6792: # This bound will be used in Step 4. #
6793: # #
6794: # Step 4. Approximate exp(R)-1 by a polynomial #
6795: # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6796: # Notes: a) In order to reduce memory access, the coefficients #
6797: # are made as "short" as possible: A1 (which is 1/2), A4 #
6798: # and A5 are single precision; A2 and A3 are double #
6799: # precision. #
6800: # b) Even with the restrictions above, #
6801: # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6802: # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6803: # c) To fully utilize the pipeline, p is separated into #
6804: # two independent pieces of roughly equal complexities #
6805: # p = [ R + R*S*(A2 + S*A4) ] + #
6806: # [ S*(A1 + S*(A3 + S*A5)) ] #
6807: # where S = R*R. #
6808: # #
6809: # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6810: # ans := T + ( T*p + t) #
6811: # where T and t are the stored values for 2^(J/64). #
6812: # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6813: # 2^(J/64) to roughly 85 bits; T is in extended precision #
6814: # and t is in single precision. Note also that T is #
6815: # rounded to 62 bits so that the last two bits of T are #
6816: # zero. The reason for such a special form is that T-1, #
6817: # T-2, and T-8 will all be exact --- a property that will #
6818: # give much more accurate computation of the function #
6819: # EXPM1. #
6820: # #
6821: # Step 6. Reconstruction of exp(X) #
6822: # exp(X) = 2^M * 2^(J/64) * exp(R). #
6823: # 6.1 If AdjFlag = 0, go to 6.3 #
6824: # 6.2 ans := ans * AdjScale #
6825: # 6.3 Restore the user FPCR #
6826: # 6.4 Return ans := ans * Scale. Exit. #
6827: # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6828: # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6829: # neither overflow nor underflow. If AdjFlag = 1, that #
6830: # means that #
6831: # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6832: # Hence, exp(X) may overflow or underflow or neither. #
6833: # When that is the case, AdjScale = 2^(M1) where M1 is #
6834: # approximately M. Thus 6.2 will never cause #
6835: # over/underflow. Possible exception in 6.4 is overflow #
6836: # or underflow. The inexact exception is not generated in #
6837: # 6.4. Although one can argue that the inexact flag #
6838: # should always be raised, to simulate that exception #
6839: # cost to much than the flag is worth in practical uses. #
6840: # #
6841: # Step 7. Return 1 + X. #
6842: # 7.1 ans := X #
6843: # 7.2 Restore user FPCR. #
6844: # 7.3 Return ans := 1 + ans. Exit #
6845: # Notes: For non-zero X, the inexact exception will always be #
6846: # raised by 7.3. That is the only exception raised by 7.3.#
6847: # Note also that we use the FMOVEM instruction to move X #
6848: # in Step 7.1 to avoid unnecessary trapping. (Although #
6849: # the FMOVEM may not seem relevant since X is normalized, #
6850: # the precaution will be useful in the library version of #
6851: # this code where the separate entry for denormalized #
6852: # inputs will be done away with.) #
6853: # #
6854: # Step 8. Handle exp(X) where |X| >= 16380log2. #
6855: # 8.1 If |X| > 16480 log2, go to Step 9. #
6856: # (mimic 2.2 - 2.6) #
6857: # 8.2 N := round-to-integer( X * 64/log2 ) #
6858: # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6859: # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6860: # AdjFlag := 1. #
6861: # 8.5 Calculate the address of the stored value #
6862: # 2^(J/64). #
6863: # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6864: # 8.7 Go to Step 3. #
6865: # Notes: Refer to notes for 2.2 - 2.6. #
6866: # #
6867: # Step 9. Handle exp(X), |X| > 16480 log2. #
6868: # 9.1 If X < 0, go to 9.3 #
6869: # 9.2 ans := Huge, go to 9.4 #
6870: # 9.3 ans := Tiny. #
6871: # 9.4 Restore user FPCR. #
6872: # 9.5 Return ans := ans * ans. Exit. #
6873: # Notes: Exp(X) will surely overflow or underflow, depending on #
6874: # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6875: # extended-precision numbers whose square over/underflow #
6876: # with an inexact result. Thus, 9.5 always raises the #
6877: # inexact together with either overflow or underflow. #
6878: # #
6879: # setoxm1d #
6880: # -------- #
6881: # #
6882: # Step 1. Set ans := 0 #
6883: # #
6884: # Step 2. Return ans := X + ans. Exit. #
6885: # Notes: This will return X with the appropriate rounding #
6886: # precision prescribed by the user FPCR. #
6887: # #
6888: # setoxm1 #
6889: # ------- #
6890: # #
6891: # Step 1. Check |X| #
6892: # 1.1 If |X| >= 1/4, go to Step 1.3. #
6893: # 1.2 Go to Step 7. #
6894: # 1.3 If |X| < 70 log(2), go to Step 2. #
6895: # 1.4 Go to Step 10. #
6896: # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6897: # However, it is conceivable |X| can be small very often #
6898: # because EXPM1 is intended to evaluate exp(X)-1 #
6899: # accurately when |X| is small. For further details on #
6900: # the comparisons, see the notes on Step 1 of setox. #
6901: # #
6902: # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6903: # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
6904: # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
6905: # or 63. #
6906: # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
6907: # 2.4 Calculate the address of the stored value of #
6908: # 2^(J/64). #
6909: # 2.5 Create the values Sc = 2^M and #
6910: # OnebySc := -2^(-M). #
6911: # Notes: See the notes on Step 2 of setox. #
6912: # #
6913: # Step 3. Calculate X - N*log2/64. #
6914: # 3.1 R := X + N*L1, #
6915: # where L1 := single-precision(-log2/64). #
6916: # 3.2 R := R + N*L2, #
6917: # L2 := extended-precision(-log2/64 - L1).#
6918: # Notes: Applying the analysis of Step 3 of setox in this case #
6919: # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
6920: # this case). #
6921: # #
6922: # Step 4. Approximate exp(R)-1 by a polynomial #
6923: # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
6924: # Notes: a) In order to reduce memory access, the coefficients #
6925: # are made as "short" as possible: A1 (which is 1/2), A5 #
6926: # and A6 are single precision; A2, A3 and A4 are double #
6927: # precision. #
6928: # b) Even with the restriction above, #
6929: # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
6930: # for all |R| <= 0.0055. #
6931: # c) To fully utilize the pipeline, p is separated into #
6932: # two independent pieces of roughly equal complexity #
6933: # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
6934: # [ R + S*(A1 + S*(A3 + S*A5)) ] #
6935: # where S = R*R. #
6936: # #
6937: # Step 5. Compute 2^(J/64)*p by #
6938: # p := T*p #
6939: # where T and t are the stored values for 2^(J/64). #
6940: # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6941: # 2^(J/64) to roughly 85 bits; T is in extended precision #
6942: # and t is in single precision. Note also that T is #
6943: # rounded to 62 bits so that the last two bits of T are #
6944: # zero. The reason for such a special form is that T-1, #
6945: # T-2, and T-8 will all be exact --- a property that will #
6946: # be exploited in Step 6 below. The total relative error #
6947: # in p is no bigger than 2^(-67.7) compared to the final #
6948: # result. #
6949: # #
6950: # Step 6. Reconstruction of exp(X)-1 #
6951: # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
6952: # 6.1 If M <= 63, go to Step 6.3. #
6953: # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
6954: # 6.3 If M >= -3, go to 6.5. #
6955: # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
6956: # 6.5 ans := (T + OnebySc) + (p + t). #
6957: # 6.6 Restore user FPCR. #
6958: # 6.7 Return ans := Sc * ans. Exit. #
6959: # Notes: The various arrangements of the expressions give #
6960: # accurate evaluations. #
6961: # #
6962: # Step 7. exp(X)-1 for |X| < 1/4. #
6963: # 7.1 If |X| >= 2^(-65), go to Step 9. #
6964: # 7.2 Go to Step 8. #
6965: # #
6966: # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
6967: # 8.1 If |X| < 2^(-16312), goto 8.3 #
6968: # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
6969: # Exit. #
6970: # 8.3 X := X * 2^(140). #
6971: # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
6972: # Return ans := ans*2^(140). Exit #
6973: # Notes: The idea is to return "X - tiny" under the user #
6974: # precision and rounding modes. To avoid unnecessary #
6975: # inefficiency, we stay away from denormalized numbers #
6976: # the best we can. For |X| >= 2^(-16312), the #
6977: # straightforward 8.2 generates the inexact exception as #
6978: # the case warrants. #
6979: # #
6980: # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
6981: # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
6982: # Notes: a) In order to reduce memory access, the coefficients #
6983: # are made as "short" as possible: B1 (which is 1/2), B9 #
6984: # to B12 are single precision; B3 to B8 are double #
6985: # precision; and B2 is double extended. #
6986: # b) Even with the restriction above, #
6987: # |p - (exp(X)-1)| < |X| 2^(-70.6) #
6988: # for all |X| <= 0.251. #
6989: # Note that 0.251 is slightly bigger than 1/4. #
6990: # c) To fully preserve accuracy, the polynomial is #
6991: # computed as #
6992: # X + ( S*B1 + Q ) where S = X*X and #
6993: # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
6994: # d) To fully utilize the pipeline, Q is separated into #
6995: # two independent pieces of roughly equal complexity #
6996: # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
6997: # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
6998: # #
6999: # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
7000: # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7001: # practical purposes. Therefore, go to Step 1 of setox. #
7002: # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7003: # purposes. #
7004: # ans := -1 #
7005: # Restore user FPCR #
7006: # Return ans := ans + 2^(-126). Exit. #
7007: # Notes: 10.2 will always create an inexact and return -1 + tiny #
7008: # in the user rounding precision and mode. #
7009: # #
7010: #########################################################################
7011:
7012: L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7013:
7014: EEXPA3: long 0x3FA55555,0x55554CC1
7015: EEXPA2: long 0x3FC55555,0x55554A54
7016:
7017: EM1A4: long 0x3F811111,0x11174385
7018: EM1A3: long 0x3FA55555,0x55554F5A
7019:
7020: EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7021:
7022: EM1B8: long 0x3EC71DE3,0xA5774682
7023: EM1B7: long 0x3EFA01A0,0x19D7CB68
7024:
7025: EM1B6: long 0x3F2A01A0,0x1A019DF3
7026: EM1B5: long 0x3F56C16C,0x16C170E2
7027:
7028: EM1B4: long 0x3F811111,0x11111111
7029: EM1B3: long 0x3FA55555,0x55555555
7030:
7031: EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7032: long 0x00000000
7033:
7034: TWO140: long 0x48B00000,0x00000000
7035: TWON140:
7036: long 0x37300000,0x00000000
7037:
7038: EEXPTBL:
7039: long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7040: long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7041: long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7042: long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7043: long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7044: long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7045: long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7046: long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7047: long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7048: long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7049: long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7050: long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7051: long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7052: long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7053: long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7054: long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7055: long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7056: long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7057: long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7058: long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7059: long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7060: long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7061: long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7062: long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7063: long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7064: long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7065: long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7066: long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7067: long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7068: long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7069: long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7070: long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7071: long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7072: long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7073: long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7074: long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7075: long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7076: long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7077: long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7078: long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7079: long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7080: long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7081: long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7082: long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7083: long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7084: long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7085: long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7086: long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7087: long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7088: long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7089: long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7090: long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7091: long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7092: long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7093: long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7094: long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7095: long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7096: long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7097: long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7098: long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7099: long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7100: long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7101: long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7102: long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7103:
7104: set ADJFLAG,L_SCR2
7105: set SCALE,FP_SCR0
7106: set ADJSCALE,FP_SCR1
7107: set SC,FP_SCR0
7108: set ONEBYSC,FP_SCR1
7109:
7110: global setox
7111: setox:
7112: #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7113:
7114: #--Step 1.
7115: mov.l (%a0),%d1 # load part of input X
7116: and.l &0x7FFF0000,%d1 # biased expo. of X
7117: cmp.l %d1,&0x3FBE0000 # 2^(-65)
7118: bge.b EXPC1 # normal case
7119: bra EXPSM
7120:
7121: EXPC1:
7122: #--The case |X| >= 2^(-65)
7123: mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7124: cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7125: blt.b EXPMAIN # normal case
7126: bra EEXPBIG
7127:
7128: EXPMAIN:
7129: #--Step 2.
7130: #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7131: fmov.x (%a0),%fp0 # load input from (a0)
7132:
7133: fmov.x %fp0,%fp1
7134: fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7135: fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7136: mov.l &0,ADJFLAG(%a6)
7137: fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7138: lea EEXPTBL(%pc),%a1
7139: fmov.l %d1,%fp0 # convert to floating-format
7140:
7141: mov.l %d1,L_SCR1(%a6) # save N temporarily
7142: and.l &0x3F,%d1 # D0 is J = N mod 64
7143: lsl.l &4,%d1
7144: add.l %d1,%a1 # address of 2^(J/64)
7145: mov.l L_SCR1(%a6),%d1
7146: asr.l &6,%d1 # D0 is M
7147: add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7148: mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7149:
7150: EXPCONT1:
7151: #--Step 3.
7152: #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7153: #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7154: fmov.x %fp0,%fp2
7155: fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7156: fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7157: fadd.x %fp1,%fp0 # X + N*L1
7158: fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7159:
7160: #--Step 4.
7161: #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7162: #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7163: #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7164: #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7165:
7166: fmov.x %fp0,%fp1
7167: fmul.x %fp1,%fp1 # fp1 IS S = R*R
7168:
7169: fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7170:
7171: fmul.x %fp1,%fp2 # fp2 IS S*A5
7172: fmov.x %fp1,%fp3
7173: fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7174:
7175: fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7176: fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7177:
7178: fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7179: mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7180: mov.l &0x80000000,SCALE+4(%a6)
7181: clr.l SCALE+8(%a6)
7182:
7183: fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7184:
7185: fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7186: fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7187:
7188: fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7189: fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7190:
7191: fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7192: fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7193:
7194: #--Step 5
7195: #--final reconstruction process
7196: #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7197:
7198: fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7199: fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7200: fadd.s (%a1),%fp0 # accurate 2^(J/64)
7201:
7202: fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7203: mov.l ADJFLAG(%a6),%d1
7204:
7205: #--Step 6
7206: tst.l %d1
7207: beq.b NORMAL
7208: ADJUST:
7209: fmul.x ADJSCALE(%a6),%fp0
7210: NORMAL:
7211: fmov.l %d0,%fpcr # restore user FPCR
7212: mov.b &FMUL_OP,%d1 # last inst is MUL
7213: fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7214: bra t_catch
7215:
7216: EXPSM:
7217: #--Step 7
7218: fmovm.x (%a0),&0x80 # load X
7219: fmov.l %d0,%fpcr
7220: fadd.s &0x3F800000,%fp0 # 1+X in user mode
7221: bra t_pinx2
7222:
7223: EEXPBIG:
7224: #--Step 8
7225: cmp.l %d1,&0x400CB27C # 16480 log2
7226: bgt.b EXP2BIG
7227: #--Steps 8.2 -- 8.6
7228: fmov.x (%a0),%fp0 # load input from (a0)
7229:
7230: fmov.x %fp0,%fp1
7231: fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7232: fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7233: mov.l &1,ADJFLAG(%a6)
7234: fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7235: lea EEXPTBL(%pc),%a1
7236: fmov.l %d1,%fp0 # convert to floating-format
7237: mov.l %d1,L_SCR1(%a6) # save N temporarily
7238: and.l &0x3F,%d1 # D0 is J = N mod 64
7239: lsl.l &4,%d1
7240: add.l %d1,%a1 # address of 2^(J/64)
7241: mov.l L_SCR1(%a6),%d1
7242: asr.l &6,%d1 # D0 is K
7243: mov.l %d1,L_SCR1(%a6) # save K temporarily
7244: asr.l &1,%d1 # D0 is M1
7245: sub.l %d1,L_SCR1(%a6) # a1 is M
7246: add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7247: mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7248: mov.l &0x80000000,ADJSCALE+4(%a6)
7249: clr.l ADJSCALE+8(%a6)
7250: mov.l L_SCR1(%a6),%d1 # D0 is M
7251: add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7252: bra.w EXPCONT1 # go back to Step 3
7253:
7254: EXP2BIG:
7255: #--Step 9
7256: tst.b (%a0) # is X positive or negative?
7257: bmi t_unfl2
7258: bra t_ovfl2
7259:
7260: global setoxd
7261: setoxd:
7262: #--entry point for EXP(X), X is denormalized
7263: mov.l (%a0),-(%sp)
7264: andi.l &0x80000000,(%sp)
7265: ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7266:
7267: fmov.s &0x3F800000,%fp0
7268:
7269: fmov.l %d0,%fpcr
7270: fadd.s (%sp)+,%fp0
7271: bra t_pinx2
7272:
7273: global setoxm1
7274: setoxm1:
7275: #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7276:
7277: #--Step 1.
7278: #--Step 1.1
7279: mov.l (%a0),%d1 # load part of input X
7280: and.l &0x7FFF0000,%d1 # biased expo. of X
7281: cmp.l %d1,&0x3FFD0000 # 1/4
7282: bge.b EM1CON1 # |X| >= 1/4
7283: bra EM1SM
7284:
7285: EM1CON1:
7286: #--Step 1.3
7287: #--The case |X| >= 1/4
7288: mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7289: cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7290: ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7291: bra EM1BIG
7292:
7293: EM1MAIN:
7294: #--Step 2.
7295: #--This is the case: 1/4 <= |X| <= 70 log2.
7296: fmov.x (%a0),%fp0 # load input from (a0)
7297:
7298: fmov.x %fp0,%fp1
7299: fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7300: fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7301: fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7302: lea EEXPTBL(%pc),%a1
7303: fmov.l %d1,%fp0 # convert to floating-format
7304:
7305: mov.l %d1,L_SCR1(%a6) # save N temporarily
7306: and.l &0x3F,%d1 # D0 is J = N mod 64
7307: lsl.l &4,%d1
7308: add.l %d1,%a1 # address of 2^(J/64)
7309: mov.l L_SCR1(%a6),%d1
7310: asr.l &6,%d1 # D0 is M
7311: mov.l %d1,L_SCR1(%a6) # save a copy of M
7312:
7313: #--Step 3.
7314: #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7315: #--a0 points to 2^(J/64), D0 and a1 both contain M
7316: fmov.x %fp0,%fp2
7317: fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7318: fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7319: fadd.x %fp1,%fp0 # X + N*L1
7320: fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7321: add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7322:
7323: #--Step 4.
7324: #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7325: #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7326: #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7327: #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7328:
7329: fmov.x %fp0,%fp1
7330: fmul.x %fp1,%fp1 # fp1 IS S = R*R
7331:
7332: fmov.s &0x3950097B,%fp2 # fp2 IS a6
7333:
7334: fmul.x %fp1,%fp2 # fp2 IS S*A6
7335: fmov.x %fp1,%fp3
7336: fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7337:
7338: fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7339: fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7340: mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7341: mov.l &0x80000000,SC+4(%a6)
7342: clr.l SC+8(%a6)
7343:
7344: fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7345: mov.l L_SCR1(%a6),%d1 # D0 is M
7346: neg.w %d1 # D0 is -M
7347: fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7348: add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7349: fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7350: fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7351:
7352: fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7353: or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7354: mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7355: mov.l &0x80000000,ONEBYSC+4(%a6)
7356: clr.l ONEBYSC+8(%a6)
7357: fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7358:
7359: fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7360: fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7361:
7362: fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7363:
7364: fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7365:
7366: #--Step 5
7367: #--Compute 2^(J/64)*p
7368:
7369: fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7370:
7371: #--Step 6
7372: #--Step 6.1
7373: mov.l L_SCR1(%a6),%d1 # retrieve M
7374: cmp.l %d1,&63
7375: ble.b MLE63
7376: #--Step 6.2 M >= 64
7377: fmov.s 12(%a1),%fp1 # fp1 is t
7378: fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7379: fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7380: fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7381: bra EM1SCALE
7382: MLE63:
7383: #--Step 6.3 M <= 63
7384: cmp.l %d1,&-3
7385: bge.b MGEN3
7386: MLTN3:
7387: #--Step 6.4 M <= -4
7388: fadd.s 12(%a1),%fp0 # p+t
7389: fadd.x (%a1),%fp0 # T+(p+t)
7390: fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7391: bra EM1SCALE
7392: MGEN3:
7393: #--Step 6.5 -3 <= M <= 63
7394: fmov.x (%a1)+,%fp1 # fp1 is T
7395: fadd.s (%a1),%fp0 # fp0 is p+t
7396: fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7397: fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7398:
7399: EM1SCALE:
7400: #--Step 6.6
7401: fmov.l %d0,%fpcr
7402: fmul.x SC(%a6),%fp0
7403: bra t_inx2
7404:
7405: EM1SM:
7406: #--Step 7 |X| < 1/4.
7407: cmp.l %d1,&0x3FBE0000 # 2^(-65)
7408: bge.b EM1POLY
7409:
7410: EM1TINY:
7411: #--Step 8 |X| < 2^(-65)
7412: cmp.l %d1,&0x00330000 # 2^(-16312)
7413: blt.b EM12TINY
7414: #--Step 8.2
7415: mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7416: mov.l &0x80000000,SC+4(%a6)
7417: clr.l SC+8(%a6)
7418: fmov.x (%a0),%fp0
7419: fmov.l %d0,%fpcr
7420: mov.b &FADD_OP,%d1 # last inst is ADD
7421: fadd.x SC(%a6),%fp0
7422: bra t_catch
7423:
7424: EM12TINY:
7425: #--Step 8.3
7426: fmov.x (%a0),%fp0
7427: fmul.d TWO140(%pc),%fp0
7428: mov.l &0x80010000,SC(%a6)
7429: mov.l &0x80000000,SC+4(%a6)
7430: clr.l SC+8(%a6)
7431: fadd.x SC(%a6),%fp0
7432: fmov.l %d0,%fpcr
7433: mov.b &FMUL_OP,%d1 # last inst is MUL
7434: fmul.d TWON140(%pc),%fp0
7435: bra t_catch
7436:
7437: EM1POLY:
7438: #--Step 9 exp(X)-1 by a simple polynomial
7439: fmov.x (%a0),%fp0 # fp0 is X
7440: fmul.x %fp0,%fp0 # fp0 is S := X*X
7441: fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7442: fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7443: fmul.x %fp0,%fp1 # fp1 is S*B12
7444: fmov.s &0x310F8290,%fp2 # fp2 is B11
7445: fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7446:
7447: fmul.x %fp0,%fp2 # fp2 is S*B11
7448: fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7449:
7450: fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7451: fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7452:
7453: fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7454: fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7455:
7456: fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7457: fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7458:
7459: fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7460: fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7461:
7462: fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7463: fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7464:
7465: fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7466: fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7467:
7468: fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7469: fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7470:
7471: fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7472: fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7473:
7474: fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7475: fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7476:
7477: fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7478: fadd.x %fp2,%fp1 # fp1 is Q
7479:
7480: fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7481:
7482: fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7483:
7484: fmov.l %d0,%fpcr
7485: fadd.x (%a0),%fp0
7486: bra t_inx2
7487:
7488: EM1BIG:
7489: #--Step 10 |X| > 70 log2
7490: mov.l (%a0),%d1
7491: cmp.l %d1,&0
7492: bgt.w EXPC1
7493: #--Step 10.2
7494: fmov.s &0xBF800000,%fp0 # fp0 is -1
7495: fmov.l %d0,%fpcr
7496: fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7497: bra t_minx2
7498:
7499: global setoxm1d
7500: setoxm1d:
7501: #--entry point for EXPM1(X), here X is denormalized
7502: #--Step 0.
7503: bra t_extdnrm
7504:
7505: #########################################################################
7506: # sgetexp(): returns the exponent portion of the input argument. #
7507: # The exponent bias is removed and the exponent value is #
7508: # returned as an extended precision number in fp0. #
7509: # sgetexpd(): handles denormalized numbers. #
7510: # #
7511: # sgetman(): extracts the mantissa of the input argument. The #
7512: # mantissa is converted to an extended precision number w/ #
7513: # an exponent of $3fff and is returned in fp0. The range of #
7514: # the result is [1.0 - 2.0). #
7515: # sgetmand(): handles denormalized numbers. #
7516: # #
7517: # INPUT *************************************************************** #
7518: # a0 = pointer to extended precision input #
7519: # #
7520: # OUTPUT ************************************************************** #
7521: # fp0 = exponent(X) or mantissa(X) #
7522: # #
7523: #########################################################################
7524:
7525: global sgetexp
7526: sgetexp:
7527: mov.w SRC_EX(%a0),%d0 # get the exponent
7528: bclr &0xf,%d0 # clear the sign bit
7529: subi.w &0x3fff,%d0 # subtract off the bias
7530: fmov.w %d0,%fp0 # return exp in fp0
7531: blt.b sgetexpn # it's negative
7532: rts
7533:
7534: sgetexpn:
7535: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7536: rts
7537:
7538: global sgetexpd
7539: sgetexpd:
7540: bsr.l norm # normalize
7541: neg.w %d0 # new exp = -(shft amt)
7542: subi.w &0x3fff,%d0 # subtract off the bias
7543: fmov.w %d0,%fp0 # return exp in fp0
7544: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7545: rts
7546:
7547: global sgetman
7548: sgetman:
7549: mov.w SRC_EX(%a0),%d0 # get the exp
7550: ori.w &0x7fff,%d0 # clear old exp
7551: bclr &0xe,%d0 # make it the new exp +-3fff
7552:
7553: # here, we build the result in a tmp location so as not to disturb the input
7554: mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7555: mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7556: mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7557: fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7558: bmi.b sgetmann # it's negative
7559: rts
7560:
7561: sgetmann:
7562: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7563: rts
7564:
7565: #
7566: # For denormalized numbers, shift the mantissa until the j-bit = 1,
7567: # then load the exponent with +/1 $3fff.
7568: #
7569: global sgetmand
7570: sgetmand:
7571: bsr.l norm # normalize exponent
7572: bra.b sgetman
7573:
7574: #########################################################################
7575: # scosh(): computes the hyperbolic cosine of a normalized input #
7576: # scoshd(): computes the hyperbolic cosine of a denormalized input #
7577: # #
7578: # INPUT *************************************************************** #
7579: # a0 = pointer to extended precision input #
7580: # d0 = round precision,mode #
7581: # #
7582: # OUTPUT ************************************************************** #
7583: # fp0 = cosh(X) #
7584: # #
7585: # ACCURACY and MONOTONICITY ******************************************* #
7586: # The returned result is within 3 ulps in 64 significant bit, #
7587: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7588: # rounded to double precision. The result is provably monotonic #
7589: # in double precision. #
7590: # #
7591: # ALGORITHM *********************************************************** #
7592: # #
7593: # COSH #
7594: # 1. If |X| > 16380 log2, go to 3. #
7595: # #
7596: # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7597: # y = |X|, z = exp(Y), and #
7598: # cosh(X) = (1/2)*( z + 1/z ). #
7599: # Exit. #
7600: # #
7601: # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7602: # #
7603: # 4. (16380 log2 < |X| <= 16480 log2) #
7604: # cosh(X) = sign(X) * exp(|X|)/2. #
7605: # However, invoking exp(|X|) may cause premature #
7606: # overflow. Thus, we calculate sinh(X) as follows: #
7607: # Y := |X| #
7608: # Fact := 2**(16380) #
7609: # Y' := Y - 16381 log2 #
7610: # cosh(X) := Fact * exp(Y'). #
7611: # Exit. #
7612: # #
7613: # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7614: # Huge*Huge to generate overflow and an infinity with #
7615: # the appropriate sign. Huge is the largest finite number #
7616: # in extended format. Exit. #
7617: # #
7618: #########################################################################
7619:
7620: TWO16380:
7621: long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7622:
7623: global scosh
7624: scosh:
7625: fmov.x (%a0),%fp0 # LOAD INPUT
7626:
7627: mov.l (%a0),%d1
7628: mov.w 4(%a0),%d1
7629: and.l &0x7FFFFFFF,%d1
7630: cmp.l %d1,&0x400CB167
7631: bgt.b COSHBIG
7632:
7633: #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7634: #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7635:
7636: fabs.x %fp0 # |X|
7637:
7638: mov.l %d0,-(%sp)
7639: clr.l %d0
7640: fmovm.x &0x01,-(%sp) # save |X| to stack
7641: lea (%sp),%a0 # pass ptr to |X|
7642: bsr setox # FP0 IS EXP(|X|)
7643: add.l &0xc,%sp # erase |X| from stack
7644: fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7645: mov.l (%sp)+,%d0
7646:
7647: fmov.s &0x3E800000,%fp1 # (1/4)
7648: fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7649:
7650: fmov.l %d0,%fpcr
7651: mov.b &FADD_OP,%d1 # last inst is ADD
7652: fadd.x %fp1,%fp0
7653: bra t_catch
7654:
7655: COSHBIG:
7656: cmp.l %d1,&0x400CB2B3
7657: bgt.b COSHHUGE
7658:
7659: fabs.x %fp0
7660: fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7661: fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7662:
7663: mov.l %d0,-(%sp)
7664: clr.l %d0
7665: fmovm.x &0x01,-(%sp) # save fp0 to stack
7666: lea (%sp),%a0 # pass ptr to fp0
7667: bsr setox
7668: add.l &0xc,%sp # clear fp0 from stack
7669: mov.l (%sp)+,%d0
7670:
7671: fmov.l %d0,%fpcr
7672: mov.b &FMUL_OP,%d1 # last inst is MUL
7673: fmul.x TWO16380(%pc),%fp0
7674: bra t_catch
7675:
7676: COSHHUGE:
7677: bra t_ovfl2
7678:
7679: global scoshd
7680: #--COSH(X) = 1 FOR DENORMALIZED X
7681: scoshd:
7682: fmov.s &0x3F800000,%fp0
7683:
7684: fmov.l %d0,%fpcr
7685: fadd.s &0x00800000,%fp0
7686: bra t_pinx2
7687:
7688: #########################################################################
7689: # ssinh(): computes the hyperbolic sine of a normalized input #
7690: # ssinhd(): computes the hyperbolic sine of a denormalized input #
7691: # #
7692: # INPUT *************************************************************** #
7693: # a0 = pointer to extended precision input #
7694: # d0 = round precision,mode #
7695: # #
7696: # OUTPUT ************************************************************** #
7697: # fp0 = sinh(X) #
7698: # #
7699: # ACCURACY and MONOTONICITY ******************************************* #
7700: # The returned result is within 3 ulps in 64 significant bit, #
7701: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7702: # rounded to double precision. The result is provably monotonic #
7703: # in double precision. #
7704: # #
7705: # ALGORITHM *********************************************************** #
7706: # #
7707: # SINH #
7708: # 1. If |X| > 16380 log2, go to 3. #
7709: # #
7710: # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7711: # y = |X|, sgn = sign(X), and z = expm1(Y), #
7712: # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7713: # Exit. #
7714: # #
7715: # 3. If |X| > 16480 log2, go to 5. #
7716: # #
7717: # 4. (16380 log2 < |X| <= 16480 log2) #
7718: # sinh(X) = sign(X) * exp(|X|)/2. #
7719: # However, invoking exp(|X|) may cause premature overflow. #
7720: # Thus, we calculate sinh(X) as follows: #
7721: # Y := |X| #
7722: # sgn := sign(X) #
7723: # sgnFact := sgn * 2**(16380) #
7724: # Y' := Y - 16381 log2 #
7725: # sinh(X) := sgnFact * exp(Y'). #
7726: # Exit. #
7727: # #
7728: # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7729: # sign(X)*Huge*Huge to generate overflow and an infinity with #
7730: # the appropriate sign. Huge is the largest finite number in #
7731: # extended format. Exit. #
7732: # #
7733: #########################################################################
7734:
7735: global ssinh
7736: ssinh:
7737: fmov.x (%a0),%fp0 # LOAD INPUT
7738:
7739: mov.l (%a0),%d1
7740: mov.w 4(%a0),%d1
7741: mov.l %d1,%a1 # save (compacted) operand
7742: and.l &0x7FFFFFFF,%d1
7743: cmp.l %d1,&0x400CB167
7744: bgt.b SINHBIG
7745:
7746: #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7747: #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7748:
7749: fabs.x %fp0 # Y = |X|
7750:
7751: movm.l &0x8040,-(%sp) # {a1/d0}
7752: fmovm.x &0x01,-(%sp) # save Y on stack
7753: lea (%sp),%a0 # pass ptr to Y
7754: clr.l %d0
7755: bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7756: add.l &0xc,%sp # clear Y from stack
7757: fmov.l &0,%fpcr
7758: movm.l (%sp)+,&0x0201 # {a1/d0}
7759:
7760: fmov.x %fp0,%fp1
7761: fadd.s &0x3F800000,%fp1 # 1+Z
7762: fmov.x %fp0,-(%sp)
7763: fdiv.x %fp1,%fp0 # Z/(1+Z)
7764: mov.l %a1,%d1
7765: and.l &0x80000000,%d1
7766: or.l &0x3F000000,%d1
7767: fadd.x (%sp)+,%fp0
7768: mov.l %d1,-(%sp)
7769:
7770: fmov.l %d0,%fpcr
7771: mov.b &FMUL_OP,%d1 # last inst is MUL
7772: fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7773: bra t_catch
7774:
7775: SINHBIG:
7776: cmp.l %d1,&0x400CB2B3
7777: bgt t_ovfl
7778: fabs.x %fp0
7779: fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7780: mov.l &0,-(%sp)
7781: mov.l &0x80000000,-(%sp)
7782: mov.l %a1,%d1
7783: and.l &0x80000000,%d1
7784: or.l &0x7FFB0000,%d1
7785: mov.l %d1,-(%sp) # EXTENDED FMT
7786: fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7787:
7788: mov.l %d0,-(%sp)
7789: clr.l %d0
7790: fmovm.x &0x01,-(%sp) # save fp0 on stack
7791: lea (%sp),%a0 # pass ptr to fp0
7792: bsr setox
7793: add.l &0xc,%sp # clear fp0 from stack
7794:
7795: mov.l (%sp)+,%d0
7796: fmov.l %d0,%fpcr
7797: mov.b &FMUL_OP,%d1 # last inst is MUL
7798: fmul.x (%sp)+,%fp0 # possible exception
7799: bra t_catch
7800:
7801: global ssinhd
7802: #--SINH(X) = X FOR DENORMALIZED X
7803: ssinhd:
7804: bra t_extdnrm
7805:
7806: #########################################################################
7807: # stanh(): computes the hyperbolic tangent of a normalized input #
7808: # stanhd(): computes the hyperbolic tangent of a denormalized input #
7809: # #
7810: # INPUT *************************************************************** #
7811: # a0 = pointer to extended precision input #
7812: # d0 = round precision,mode #
7813: # #
7814: # OUTPUT ************************************************************** #
7815: # fp0 = tanh(X) #
7816: # #
7817: # ACCURACY and MONOTONICITY ******************************************* #
7818: # The returned result is within 3 ulps in 64 significant bit, #
7819: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7820: # rounded to double precision. The result is provably monotonic #
7821: # in double precision. #
7822: # #
7823: # ALGORITHM *********************************************************** #
7824: # #
7825: # TANH #
7826: # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7827: # #
7828: # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7829: # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7830: # tanh(X) = sgn*( z/(2+z) ). #
7831: # Exit. #
7832: # #
7833: # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7834: # go to 7. #
7835: # #
7836: # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7837: # #
7838: # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7839: # sgn := sign(X), y := 2|X|, z := exp(Y), #
7840: # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7841: # Exit. #
7842: # #
7843: # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7844: # calculate Tanh(X) by #
7845: # sgn := sign(X), Tiny := 2**(-126), #
7846: # tanh(X) := sgn - sgn*Tiny. #
7847: # Exit. #
7848: # #
7849: # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7850: # #
7851: #########################################################################
7852:
7853: set X,FP_SCR0
7854: set XFRAC,X+4
7855:
7856: set SGN,L_SCR3
7857:
7858: set V,FP_SCR0
7859:
7860: global stanh
7861: stanh:
7862: fmov.x (%a0),%fp0 # LOAD INPUT
7863:
7864: fmov.x %fp0,X(%a6)
7865: mov.l (%a0),%d1
7866: mov.w 4(%a0),%d1
7867: mov.l %d1,X(%a6)
7868: and.l &0x7FFFFFFF,%d1
7869: cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7870: blt.w TANHBORS # yes
7871: cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7872: bgt.w TANHBORS # yes
7873:
7874: #--THIS IS THE USUAL CASE
7875: #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7876:
7877: mov.l X(%a6),%d1
7878: mov.l %d1,SGN(%a6)
7879: and.l &0x7FFF0000,%d1
7880: add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7881: mov.l %d1,X(%a6)
7882: and.l &0x80000000,SGN(%a6)
7883: fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7884:
7885: mov.l %d0,-(%sp)
7886: clr.l %d0
7887: fmovm.x &0x1,-(%sp) # save Y on stack
7888: lea (%sp),%a0 # pass ptr to Y
7889: bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7890: add.l &0xc,%sp # clear Y from stack
7891: mov.l (%sp)+,%d0
7892:
7893: fmov.x %fp0,%fp1
7894: fadd.s &0x40000000,%fp1 # Z+2
7895: mov.l SGN(%a6),%d1
7896: fmov.x %fp1,V(%a6)
7897: eor.l %d1,V(%a6)
7898:
7899: fmov.l %d0,%fpcr # restore users round prec,mode
7900: fdiv.x V(%a6),%fp0
7901: bra t_inx2
7902:
7903: TANHBORS:
7904: cmp.l %d1,&0x3FFF8000
7905: blt.w TANHSM
7906:
7907: cmp.l %d1,&0x40048AA1
7908: bgt.w TANHHUGE
7909:
7910: #-- (5/2) LOG2 < |X| < 50 LOG2,
7911: #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
7912: #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
7913:
7914: mov.l X(%a6),%d1
7915: mov.l %d1,SGN(%a6)
7916: and.l &0x7FFF0000,%d1
7917: add.l &0x00010000,%d1 # EXPO OF 2|X|
7918: mov.l %d1,X(%a6) # Y = 2|X|
7919: and.l &0x80000000,SGN(%a6)
7920: mov.l SGN(%a6),%d1
7921: fmov.x X(%a6),%fp0 # Y = 2|X|
7922:
7923: mov.l %d0,-(%sp)
7924: clr.l %d0
7925: fmovm.x &0x01,-(%sp) # save Y on stack
7926: lea (%sp),%a0 # pass ptr to Y
7927: bsr setox # FP0 IS EXP(Y)
7928: add.l &0xc,%sp # clear Y from stack
7929: mov.l (%sp)+,%d0
7930: mov.l SGN(%a6),%d1
7931: fadd.s &0x3F800000,%fp0 # EXP(Y)+1
7932:
7933: eor.l &0xC0000000,%d1 # -SIGN(X)*2
7934: fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
7935: fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
7936:
7937: mov.l SGN(%a6),%d1
7938: or.l &0x3F800000,%d1 # SGN
7939: fmov.s %d1,%fp0 # SGN IN SGL FMT
7940:
7941: fmov.l %d0,%fpcr # restore users round prec,mode
7942: mov.b &FADD_OP,%d1 # last inst is ADD
7943: fadd.x %fp1,%fp0
7944: bra t_inx2
7945:
7946: TANHSM:
7947: fmov.l %d0,%fpcr # restore users round prec,mode
7948: mov.b &FMOV_OP,%d1 # last inst is MOVE
7949: fmov.x X(%a6),%fp0 # last inst - possible exception set
7950: bra t_catch
7951:
7952: #---RETURN SGN(X) - SGN(X)EPS
7953: TANHHUGE:
7954: mov.l X(%a6),%d1
7955: and.l &0x80000000,%d1
7956: or.l &0x3F800000,%d1
7957: fmov.s %d1,%fp0
7958: and.l &0x80000000,%d1
7959: eor.l &0x80800000,%d1 # -SIGN(X)*EPS
7960:
7961: fmov.l %d0,%fpcr # restore users round prec,mode
7962: fadd.s %d1,%fp0
7963: bra t_inx2
7964:
7965: global stanhd
7966: #--TANH(X) = X FOR DENORMALIZED X
7967: stanhd:
7968: bra t_extdnrm
7969:
7970: #########################################################################
7971: # slogn(): computes the natural logarithm of a normalized input #
7972: # slognd(): computes the natural logarithm of a denormalized input #
7973: # slognp1(): computes the log(1+X) of a normalized input #
7974: # slognp1d(): computes the log(1+X) of a denormalized input #
7975: # #
7976: # INPUT *************************************************************** #
7977: # a0 = pointer to extended precision input #
7978: # d0 = round precision,mode #
7979: # #
7980: # OUTPUT ************************************************************** #
7981: # fp0 = log(X) or log(1+X) #
7982: # #
7983: # ACCURACY and MONOTONICITY ******************************************* #
7984: # The returned result is within 2 ulps in 64 significant bit, #
7985: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7986: # rounded to double precision. The result is provably monotonic #
7987: # in double precision. #
7988: # #
7989: # ALGORITHM *********************************************************** #
7990: # LOGN: #
7991: # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
7992: # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
7993: # move on to Step 2. #
7994: # #
7995: # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
7996: # seven significant bits of Y plus 2**(-7), i.e. #
7997: # F = 1.xxxxxx1 in base 2 where the six "x" match those #
7998: # of Y. Note that |Y-F| <= 2**(-7). #
7999: # #
8000: # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8001: # polynomial in u, log(1+u) = poly. #
8002: # #
8003: # Step 4. Reconstruct #
8004: # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8005: # by k*log(2) + (log(F) + poly). The values of log(F) are #
8006: # calculated beforehand and stored in the program. #
8007: # #
8008: # lognp1: #
8009: # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8010: # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8011: # to Step 2. #
8012: # #
8013: # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8014: # in Step 2 of the algorithm for LOGN and compute #
8015: # log(1+X) as k*log(2) + log(F) + poly where poly #
8016: # approximates log(1+u), u = (Y-F)/F. #
8017: # #
8018: # Implementation Notes: #
8019: # Note 1. There are 64 different possible values for F, thus 64 #
8020: # log(F)'s need to be tabulated. Moreover, the values of #
8021: # 1/F are also tabulated so that the division in (Y-F)/F #
8022: # can be performed by a multiplication. #
8023: # #
8024: # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8025: # the value Y-F has to be calculated carefully when #
8026: # 1/2 <= X < 3/2. #
8027: # #
8028: # Note 3. To fully exploit the pipeline, polynomials are usually #
8029: # separated into two parts evaluated independently before #
8030: # being added up. #
8031: # #
8032: #########################################################################
8033: LOGOF2:
8034: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8035:
8036: one:
8037: long 0x3F800000
8038: zero:
8039: long 0x00000000
8040: infty:
8041: long 0x7F800000
8042: negone:
8043: long 0xBF800000
8044:
8045: LOGA6:
8046: long 0x3FC2499A,0xB5E4040B
8047: LOGA5:
8048: long 0xBFC555B5,0x848CB7DB
8049:
8050: LOGA4:
8051: long 0x3FC99999,0x987D8730
8052: LOGA3:
8053: long 0xBFCFFFFF,0xFF6F7E97
8054:
8055: LOGA2:
8056: long 0x3FD55555,0x555555A4
8057: LOGA1:
8058: long 0xBFE00000,0x00000008
8059:
8060: LOGB5:
8061: long 0x3F175496,0xADD7DAD6
8062: LOGB4:
8063: long 0x3F3C71C2,0xFE80C7E0
8064:
8065: LOGB3:
8066: long 0x3F624924,0x928BCCFF
8067: LOGB2:
8068: long 0x3F899999,0x999995EC
8069:
8070: LOGB1:
8071: long 0x3FB55555,0x55555555
8072: TWO:
8073: long 0x40000000,0x00000000
8074:
8075: LTHOLD:
8076: long 0x3f990000,0x80000000,0x00000000,0x00000000
8077:
8078: LOGTBL:
8079: long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8080: long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8081: long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8082: long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8083: long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8084: long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8085: long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8086: long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8087: long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8088: long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8089: long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8090: long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8091: long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8092: long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8093: long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8094: long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8095: long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8096: long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8097: long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8098: long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8099: long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8100: long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8101: long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8102: long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8103: long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8104: long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8105: long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8106: long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8107: long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8108: long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8109: long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8110: long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8111: long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8112: long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8113: long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8114: long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8115: long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8116: long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8117: long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8118: long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8119: long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8120: long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8121: long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8122: long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8123: long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8124: long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8125: long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8126: long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8127: long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8128: long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8129: long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8130: long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8131: long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8132: long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8133: long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8134: long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8135: long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8136: long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8137: long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8138: long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8139: long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8140: long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8141: long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8142: long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8143: long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8144: long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8145: long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8146: long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8147: long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8148: long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8149: long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8150: long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8151: long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8152: long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8153: long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8154: long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8155: long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8156: long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8157: long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8158: long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8159: long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8160: long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8161: long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8162: long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8163: long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8164: long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8165: long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8166: long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8167: long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8168: long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8169: long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8170: long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8171: long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8172: long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8173: long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8174: long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8175: long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8176: long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8177: long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8178: long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8179: long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8180: long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8181: long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8182: long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8183: long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8184: long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8185: long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8186: long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8187: long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8188: long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8189: long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8190: long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8191: long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8192: long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8193: long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8194: long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8195: long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8196: long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8197: long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8198: long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8199: long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8200: long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8201: long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8202: long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8203: long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8204: long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8205: long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8206: long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8207:
8208: set ADJK,L_SCR1
8209:
8210: set X,FP_SCR0
8211: set XDCARE,X+2
8212: set XFRAC,X+4
8213:
8214: set F,FP_SCR1
8215: set FFRAC,F+4
8216:
8217: set KLOG2,FP_SCR0
8218:
8219: set SAVEU,FP_SCR0
8220:
8221: global slogn
8222: #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8223: slogn:
8224: fmov.x (%a0),%fp0 # LOAD INPUT
8225: mov.l &0x00000000,ADJK(%a6)
8226:
8227: LOGBGN:
8228: #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8229: #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8230:
8231: mov.l (%a0),%d1
8232: mov.w 4(%a0),%d1
8233:
8234: mov.l (%a0),X(%a6)
8235: mov.l 4(%a0),X+4(%a6)
8236: mov.l 8(%a0),X+8(%a6)
8237:
8238: cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8239: blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8240: # X IS POSITIVE, CHECK IF X IS NEAR 1
8241: cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8242: blt.b LOGMAIN # YES
8243: cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8244: ble.w LOGNEAR1 # NO
8245:
8246: LOGMAIN:
8247: #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8248:
8249: #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8250: #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8251: #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8252: #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8253: #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8254: #--LOG(1+U) CAN BE VERY EFFICIENT.
8255: #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8256: #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8257:
8258: #--GET K, Y, F, AND ADDRESS OF 1/F.
8259: asr.l &8,%d1
8260: asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8261: sub.l &0x3FFF,%d1 # THIS IS K
8262: add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8263: lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8264: fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8265:
8266: #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8267: mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8268: mov.l XFRAC(%a6),FFRAC(%a6)
8269: and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8270: or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8271: mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8272: and.l &0x7E000000,%d1
8273: asr.l &8,%d1
8274: asr.l &8,%d1
8275: asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8276: add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8277:
8278: fmov.x X(%a6),%fp0
8279: mov.l &0x3fff0000,F(%a6)
8280: clr.l F+8(%a6)
8281: fsub.x F(%a6),%fp0 # Y-F
8282: fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8283: #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8284: #--REGISTERS SAVED: FPCR, FP1, FP2
8285:
8286: LP1CONT1:
8287: #--AN RE-ENTRY POINT FOR LOGNP1
8288: fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8289: fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8290: fmov.x %fp0,%fp2
8291: fmul.x %fp2,%fp2 # FP2 IS V=U*U
8292: fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8293:
8294: #--LOG(1+U) IS APPROXIMATED BY
8295: #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8296: #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8297:
8298: fmov.x %fp2,%fp3
8299: fmov.x %fp2,%fp1
8300:
8301: fmul.d LOGA6(%pc),%fp1 # V*A6
8302: fmul.d LOGA5(%pc),%fp2 # V*A5
8303:
8304: fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8305: fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8306:
8307: fmul.x %fp3,%fp1 # V*(A4+V*A6)
8308: fmul.x %fp3,%fp2 # V*(A3+V*A5)
8309:
8310: fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8311: fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8312:
8313: fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8314: add.l &16,%a0 # ADDRESS OF LOG(F)
8315: fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8316:
8317: fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8318: fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8319:
8320: fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8321: fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8322: fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8323:
8324: fmov.l %d0,%fpcr
8325: fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8326: bra t_inx2
8327:
8328:
8329: LOGNEAR1:
8330:
8331: # if the input is exactly equal to one, then exit through ld_pzero.
8332: # if these 2 lines weren't here, the correct answer would be returned
8333: # but the INEX2 bit would be set.
8334: fcmp.b %fp0,&0x1 # is it equal to one?
8335: fbeq.l ld_pzero # yes
8336:
8337: #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8338: fmov.x %fp0,%fp1
8339: fsub.s one(%pc),%fp1 # FP1 IS X-1
8340: fadd.s one(%pc),%fp0 # FP0 IS X+1
8341: fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8342: #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8343: #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8344:
8345: LP1CONT2:
8346: #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8347: fdiv.x %fp0,%fp1 # FP1 IS U
8348: fmovm.x &0xc,-(%sp) # SAVE FP2-3
8349: #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8350: #--LET V=U*U, W=V*V, CALCULATE
8351: #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8352: #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8353: fmov.x %fp1,%fp0
8354: fmul.x %fp0,%fp0 # FP0 IS V
8355: fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8356: fmov.x %fp0,%fp1
8357: fmul.x %fp1,%fp1 # FP1 IS W
8358:
8359: fmov.d LOGB5(%pc),%fp3
8360: fmov.d LOGB4(%pc),%fp2
8361:
8362: fmul.x %fp1,%fp3 # W*B5
8363: fmul.x %fp1,%fp2 # W*B4
8364:
8365: fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8366: fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8367:
8368: fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8369:
8370: fmul.x %fp0,%fp2 # V*(B2+W*B4)
8371:
8372: fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8373: fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8374:
8375: fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8376: fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8377:
8378: fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8379:
8380: fmov.l %d0,%fpcr
8381: fadd.x SAVEU(%a6),%fp0
8382: bra t_inx2
8383:
8384: #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8385: LOGNEG:
8386: bra t_operr
8387:
8388: global slognd
8389: slognd:
8390: #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8391:
8392: mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8393:
8394: #----normalize the input value by left shifting k bits (k to be determined
8395: #----below), adjusting exponent and storing -k to ADJK
8396: #----the value TWOTO100 is no longer needed.
8397: #----Note that this code assumes the denormalized input is NON-ZERO.
8398:
8399: movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8400: mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8401: mov.l 4(%a0),%d4
8402: mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8403: clr.l %d2 # D2 used for holding K
8404:
8405: tst.l %d4
8406: bne.b Hi_not0
8407:
8408: Hi_0:
8409: mov.l %d5,%d4
8410: clr.l %d5
8411: mov.l &32,%d2
8412: clr.l %d6
8413: bfffo %d4{&0:&32},%d6
8414: lsl.l %d6,%d4
8415: add.l %d6,%d2 # (D3,D4,D5) is normalized
8416:
8417: mov.l %d3,X(%a6)
8418: mov.l %d4,XFRAC(%a6)
8419: mov.l %d5,XFRAC+4(%a6)
8420: neg.l %d2
8421: mov.l %d2,ADJK(%a6)
8422: fmov.x X(%a6),%fp0
8423: movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8424: lea X(%a6),%a0
8425: bra.w LOGBGN # begin regular log(X)
8426:
8427: Hi_not0:
8428: clr.l %d6
8429: bfffo %d4{&0:&32},%d6 # find first 1
8430: mov.l %d6,%d2 # get k
8431: lsl.l %d6,%d4
8432: mov.l %d5,%d7 # a copy of D5
8433: lsl.l %d6,%d5
8434: neg.l %d6
8435: add.l &32,%d6
8436: lsr.l %d6,%d7
8437: or.l %d7,%d4 # (D3,D4,D5) normalized
8438:
8439: mov.l %d3,X(%a6)
8440: mov.l %d4,XFRAC(%a6)
8441: mov.l %d5,XFRAC+4(%a6)
8442: neg.l %d2
8443: mov.l %d2,ADJK(%a6)
8444: fmov.x X(%a6),%fp0
8445: movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8446: lea X(%a6),%a0
8447: bra.w LOGBGN # begin regular log(X)
8448:
8449: global slognp1
8450: #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8451: slognp1:
8452: fmov.x (%a0),%fp0 # LOAD INPUT
8453: fabs.x %fp0 # test magnitude
8454: fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8455: fbgt.w LP1REAL # if greater, continue
8456: fmov.l %d0,%fpcr
8457: mov.b &FMOV_OP,%d1 # last inst is MOVE
8458: fmov.x (%a0),%fp0 # return signed argument
8459: bra t_catch
8460:
8461: LP1REAL:
8462: fmov.x (%a0),%fp0 # LOAD INPUT
8463: mov.l &0x00000000,ADJK(%a6)
8464: fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8465: fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8466: fmov.x %fp0,X(%a6)
8467: mov.w XFRAC(%a6),XDCARE(%a6)
8468: mov.l X(%a6),%d1
8469: cmp.l %d1,&0
8470: ble.w LP1NEG0 # LOG OF ZERO OR -VE
8471: cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8472: blt.w LOGMAIN
8473: cmp.l %d1,&0x3fffc000
8474: bgt.w LOGMAIN
8475: #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8476: #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8477: #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8478:
8479: LP1NEAR1:
8480: #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8481: cmp.l %d1,&0x3ffef07d
8482: blt.w LP1CARE
8483: cmp.l %d1,&0x3fff8841
8484: bgt.w LP1CARE
8485:
8486: LP1ONE16:
8487: #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8488: #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8489: fadd.x %fp1,%fp1 # FP1 IS 2Z
8490: fadd.s one(%pc),%fp0 # FP0 IS 1+X
8491: #--U = FP1/FP0
8492: bra.w LP1CONT2
8493:
8494: LP1CARE:
8495: #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8496: #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8497: #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8498: #--THERE ARE ONLY TWO CASES.
8499: #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8500: #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8501: #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8502: #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8503:
8504: mov.l XFRAC(%a6),FFRAC(%a6)
8505: and.l &0xFE000000,FFRAC(%a6)
8506: or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8507: cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8508: bge.b KISZERO
8509:
8510: KISNEG1:
8511: fmov.s TWO(%pc),%fp0
8512: mov.l &0x3fff0000,F(%a6)
8513: clr.l F+8(%a6)
8514: fsub.x F(%a6),%fp0 # 2-F
8515: mov.l FFRAC(%a6),%d1
8516: and.l &0x7E000000,%d1
8517: asr.l &8,%d1
8518: asr.l &8,%d1
8519: asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8520: fadd.x %fp1,%fp1 # GET 2Z
8521: fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8522: fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8523: lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8524: add.l %d1,%a0
8525: fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8526: bra.w LP1CONT1
8527:
8528: KISZERO:
8529: fmov.s one(%pc),%fp0
8530: mov.l &0x3fff0000,F(%a6)
8531: clr.l F+8(%a6)
8532: fsub.x F(%a6),%fp0 # 1-F
8533: mov.l FFRAC(%a6),%d1
8534: and.l &0x7E000000,%d1
8535: asr.l &8,%d1
8536: asr.l &8,%d1
8537: asr.l &4,%d1
8538: fadd.x %fp1,%fp0 # FP0 IS Y-F
8539: fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8540: lea LOGTBL(%pc),%a0
8541: add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8542: fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8543: bra.w LP1CONT1
8544:
8545: LP1NEG0:
8546: #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8547: cmp.l %d1,&0
8548: blt.b LP1NEG
8549: LP1ZERO:
8550: fmov.s negone(%pc),%fp0
8551:
8552: fmov.l %d0,%fpcr
8553: bra t_dz
8554:
8555: LP1NEG:
8556: fmov.s zero(%pc),%fp0
8557:
8558: fmov.l %d0,%fpcr
8559: bra t_operr
8560:
8561: global slognp1d
8562: #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8563: # Simply return the denorm
8564: slognp1d:
8565: bra t_extdnrm
8566:
8567: #########################################################################
8568: # satanh(): computes the inverse hyperbolic tangent of a norm input #
8569: # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8570: # #
8571: # INPUT *************************************************************** #
8572: # a0 = pointer to extended precision input #
8573: # d0 = round precision,mode #
8574: # #
8575: # OUTPUT ************************************************************** #
8576: # fp0 = arctanh(X) #
8577: # #
8578: # ACCURACY and MONOTONICITY ******************************************* #
8579: # The returned result is within 3 ulps in 64 significant bit, #
8580: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8581: # rounded to double precision. The result is provably monotonic #
8582: # in double precision. #
8583: # #
8584: # ALGORITHM *********************************************************** #
8585: # #
8586: # ATANH #
8587: # 1. If |X| >= 1, go to 3. #
8588: # #
8589: # 2. (|X| < 1) Calculate atanh(X) by #
8590: # sgn := sign(X) #
8591: # y := |X| #
8592: # z := 2y/(1-y) #
8593: # atanh(X) := sgn * (1/2) * logp1(z) #
8594: # Exit. #
8595: # #
8596: # 3. If |X| > 1, go to 5. #
8597: # #
8598: # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8599: # divide-by-zero by #
8600: # sgn := sign(X) #
8601: # atan(X) := sgn / (+0). #
8602: # Exit. #
8603: # #
8604: # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8605: # Exit. #
8606: # #
8607: #########################################################################
8608:
8609: global satanh
8610: satanh:
8611: mov.l (%a0),%d1
8612: mov.w 4(%a0),%d1
8613: and.l &0x7FFFFFFF,%d1
8614: cmp.l %d1,&0x3FFF8000
8615: bge.b ATANHBIG
8616:
8617: #--THIS IS THE USUAL CASE, |X| < 1
8618: #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8619:
8620: fabs.x (%a0),%fp0 # Y = |X|
8621: fmov.x %fp0,%fp1
8622: fneg.x %fp1 # -Y
8623: fadd.x %fp0,%fp0 # 2Y
8624: fadd.s &0x3F800000,%fp1 # 1-Y
8625: fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8626: mov.l (%a0),%d1
8627: and.l &0x80000000,%d1
8628: or.l &0x3F000000,%d1 # SIGN(X)*HALF
8629: mov.l %d1,-(%sp)
8630:
8631: mov.l %d0,-(%sp) # save rnd prec,mode
8632: clr.l %d0 # pass ext prec,RN
8633: fmovm.x &0x01,-(%sp) # save Z on stack
8634: lea (%sp),%a0 # pass ptr to Z
8635: bsr slognp1 # LOG1P(Z)
8636: add.l &0xc,%sp # clear Z from stack
8637:
8638: mov.l (%sp)+,%d0 # fetch old prec,mode
8639: fmov.l %d0,%fpcr # load it
8640: mov.b &FMUL_OP,%d1 # last inst is MUL
8641: fmul.s (%sp)+,%fp0
8642: bra t_catch
8643:
8644: ATANHBIG:
8645: fabs.x (%a0),%fp0 # |X|
8646: fcmp.s %fp0,&0x3F800000
8647: fbgt t_operr
8648: bra t_dz
8649:
8650: global satanhd
8651: #--ATANH(X) = X FOR DENORMALIZED X
8652: satanhd:
8653: bra t_extdnrm
8654:
8655: #########################################################################
8656: # slog10(): computes the base-10 logarithm of a normalized input #
8657: # slog10d(): computes the base-10 logarithm of a denormalized input #
8658: # slog2(): computes the base-2 logarithm of a normalized input #
8659: # slog2d(): computes the base-2 logarithm of a denormalized input #
8660: # #
8661: # INPUT *************************************************************** #
8662: # a0 = pointer to extended precision input #
8663: # d0 = round precision,mode #
8664: # #
8665: # OUTPUT ************************************************************** #
8666: # fp0 = log_10(X) or log_2(X) #
8667: # #
8668: # ACCURACY and MONOTONICITY ******************************************* #
8669: # The returned result is within 1.7 ulps in 64 significant bit, #
8670: # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8671: # rounded to double precision. The result is provably monotonic #
8672: # in double precision. #
8673: # #
8674: # ALGORITHM *********************************************************** #
8675: # #
8676: # slog10d: #
8677: # #
8678: # Step 0. If X < 0, create a NaN and raise the invalid operation #
8679: # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8680: # Notes: Default means round-to-nearest mode, no floating-point #
8681: # traps, and precision control = double extended. #
8682: # #
8683: # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8684: # Notes: Even if X is denormalized, log(X) is always normalized. #
8685: # #
8686: # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8687: # 2.1 Restore the user FPCR #
8688: # 2.2 Return ans := Y * INV_L10. #
8689: # #
8690: # slog10: #
8691: # #
8692: # Step 0. If X < 0, create a NaN and raise the invalid operation #
8693: # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8694: # Notes: Default means round-to-nearest mode, no floating-point #
8695: # traps, and precision control = double extended. #
8696: # #
8697: # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8698: # #
8699: # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8700: # 2.1 Restore the user FPCR #
8701: # 2.2 Return ans := Y * INV_L10. #
8702: # #
8703: # sLog2d: #
8704: # #
8705: # Step 0. If X < 0, create a NaN and raise the invalid operation #
8706: # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8707: # Notes: Default means round-to-nearest mode, no floating-point #
8708: # traps, and precision control = double extended. #
8709: # #
8710: # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8711: # Notes: Even if X is denormalized, log(X) is always normalized. #
8712: # #
8713: # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8714: # 2.1 Restore the user FPCR #
8715: # 2.2 Return ans := Y * INV_L2. #
8716: # #
8717: # sLog2: #
8718: # #
8719: # Step 0. If X < 0, create a NaN and raise the invalid operation #
8720: # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8721: # Notes: Default means round-to-nearest mode, no floating-point #
8722: # traps, and precision control = double extended. #
8723: # #
8724: # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8725: # go to Step 3. #
8726: # #
8727: # Step 2. Return k. #
8728: # 2.1 Get integer k, X = 2^k. #
8729: # 2.2 Restore the user FPCR. #
8730: # 2.3 Return ans := convert-to-double-extended(k). #
8731: # #
8732: # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8733: # #
8734: # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8735: # 4.1 Restore the user FPCR #
8736: # 4.2 Return ans := Y * INV_L2. #
8737: # #
8738: #########################################################################
8739:
8740: INV_L10:
8741: long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8742:
8743: INV_L2:
8744: long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8745:
8746: global slog10
8747: #--entry point for Log10(X), X is normalized
8748: slog10:
8749: fmov.b &0x1,%fp0
8750: fcmp.x %fp0,(%a0) # if operand == 1,
8751: fbeq.l ld_pzero # return an EXACT zero
8752:
8753: mov.l (%a0),%d1
8754: blt.w invalid
8755: mov.l %d0,-(%sp)
8756: clr.l %d0
8757: bsr slogn # log(X), X normal.
8758: fmov.l (%sp)+,%fpcr
8759: fmul.x INV_L10(%pc),%fp0
8760: bra t_inx2
8761:
8762: global slog10d
8763: #--entry point for Log10(X), X is denormalized
8764: slog10d:
8765: mov.l (%a0),%d1
8766: blt.w invalid
8767: mov.l %d0,-(%sp)
8768: clr.l %d0
8769: bsr slognd # log(X), X denorm.
8770: fmov.l (%sp)+,%fpcr
8771: fmul.x INV_L10(%pc),%fp0
8772: bra t_minx2
8773:
8774: global slog2
8775: #--entry point for Log2(X), X is normalized
8776: slog2:
8777: mov.l (%a0),%d1
8778: blt.w invalid
8779:
8780: mov.l 8(%a0),%d1
8781: bne.b continue # X is not 2^k
8782:
8783: mov.l 4(%a0),%d1
8784: and.l &0x7FFFFFFF,%d1
8785: bne.b continue
8786:
8787: #--X = 2^k.
8788: mov.w (%a0),%d1
8789: and.l &0x00007FFF,%d1
8790: sub.l &0x3FFF,%d1
8791: beq.l ld_pzero
8792: fmov.l %d0,%fpcr
8793: fmov.l %d1,%fp0
8794: bra t_inx2
8795:
8796: continue:
8797: mov.l %d0,-(%sp)
8798: clr.l %d0
8799: bsr slogn # log(X), X normal.
8800: fmov.l (%sp)+,%fpcr
8801: fmul.x INV_L2(%pc),%fp0
8802: bra t_inx2
8803:
8804: invalid:
8805: bra t_operr
8806:
8807: global slog2d
8808: #--entry point for Log2(X), X is denormalized
8809: slog2d:
8810: mov.l (%a0),%d1
8811: blt.w invalid
8812: mov.l %d0,-(%sp)
8813: clr.l %d0
8814: bsr slognd # log(X), X denorm.
8815: fmov.l (%sp)+,%fpcr
8816: fmul.x INV_L2(%pc),%fp0
8817: bra t_minx2
8818:
8819: #########################################################################
8820: # stwotox(): computes 2**X for a normalized input #
8821: # stwotoxd(): computes 2**X for a denormalized input #
8822: # stentox(): computes 10**X for a normalized input #
8823: # stentoxd(): computes 10**X for a denormalized input #
8824: # #
8825: # INPUT *************************************************************** #
8826: # a0 = pointer to extended precision input #
8827: # d0 = round precision,mode #
8828: # #
8829: # OUTPUT ************************************************************** #
8830: # fp0 = 2**X or 10**X #
8831: # #
8832: # ACCURACY and MONOTONICITY ******************************************* #
8833: # The returned result is within 2 ulps in 64 significant bit, #
8834: # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8835: # rounded to double precision. The result is provably monotonic #
8836: # in double precision. #
8837: # #
8838: # ALGORITHM *********************************************************** #
8839: # #
8840: # twotox #
8841: # 1. If |X| > 16480, go to ExpBig. #
8842: # #
8843: # 2. If |X| < 2**(-70), go to ExpSm. #
8844: # #
8845: # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8846: # decompose N as #
8847: # N = 64(M + M') + j, j = 0,1,2,...,63. #
8848: # #
8849: # 4. Overwrite r := r * log2. Then #
8850: # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8851: # Go to expr to compute that expression. #
8852: # #
8853: # tentox #
8854: # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8855: # #
8856: # 2. If |X| < 2**(-70), go to ExpSm. #
8857: # #
8858: # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8859: # N := round-to-int(y). Decompose N as #
8860: # N = 64(M + M') + j, j = 0,1,2,...,63. #
8861: # #
8862: # 4. Define r as #
8863: # r := ((X - N*L1)-N*L2) * L10 #
8864: # where L1, L2 are the leading and trailing parts of #
8865: # log_10(2)/64 and L10 is the natural log of 10. Then #
8866: # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8867: # Go to expr to compute that expression. #
8868: # #
8869: # expr #
8870: # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8871: # #
8872: # 2. Overwrite Fact1 and Fact2 by #
8873: # Fact1 := 2**(M) * Fact1 #
8874: # Fact2 := 2**(M) * Fact2 #
8875: # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8876: # #
8877: # 3. Calculate P where 1 + P approximates exp(r): #
8878: # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8879: # #
8880: # 4. Let AdjFact := 2**(M'). Return #
8881: # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8882: # Exit. #
8883: # #
8884: # ExpBig #
8885: # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8886: # generate underflow by Tiny * Tiny. #
8887: # #
8888: # ExpSm #
8889: # 1. Return 1 + X. #
8890: # #
8891: #########################################################################
8892:
8893: L2TEN64:
8894: long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8895: L10TWO1:
8896: long 0x3F734413,0x509F8000 # LOG2/64LOG10
8897:
8898: L10TWO2:
8899: long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8900:
8901: LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8902:
8903: LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8904:
8905: EXPA5: long 0x3F56C16D,0x6F7BD0B2
8906: EXPA4: long 0x3F811112,0x302C712C
8907: EXPA3: long 0x3FA55555,0x55554CC1
8908: EXPA2: long 0x3FC55555,0x55554A54
8909: EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
8910:
8911: TEXPTBL:
8912: long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
8913: long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
8914: long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
8915: long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
8916: long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
8917: long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
8918: long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
8919: long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
8920: long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
8921: long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
8922: long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
8923: long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
8924: long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
8925: long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
8926: long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
8927: long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
8928: long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
8929: long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
8930: long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
8931: long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
8932: long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
8933: long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
8934: long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
8935: long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
8936: long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
8937: long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
8938: long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
8939: long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
8940: long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
8941: long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
8942: long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
8943: long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
8944: long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
8945: long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
8946: long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
8947: long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
8948: long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
8949: long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
8950: long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
8951: long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
8952: long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
8953: long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
8954: long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
8955: long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
8956: long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
8957: long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
8958: long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
8959: long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
8960: long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
8961: long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
8962: long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
8963: long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
8964: long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
8965: long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
8966: long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
8967: long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
8968: long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
8969: long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
8970: long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
8971: long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
8972: long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
8973: long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
8974: long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
8975: long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
8976:
8977: set INT,L_SCR1
8978:
8979: set X,FP_SCR0
8980: set XDCARE,X+2
8981: set XFRAC,X+4
8982:
8983: set ADJFACT,FP_SCR0
8984:
8985: set FACT1,FP_SCR0
8986: set FACT1HI,FACT1+4
8987: set FACT1LOW,FACT1+8
8988:
8989: set FACT2,FP_SCR1
8990: set FACT2HI,FACT2+4
8991: set FACT2LOW,FACT2+8
8992:
8993: global stwotox
8994: #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
8995: stwotox:
8996: fmovm.x (%a0),&0x80 # LOAD INPUT
8997:
8998: mov.l (%a0),%d1
8999: mov.w 4(%a0),%d1
9000: fmov.x %fp0,X(%a6)
9001: and.l &0x7FFFFFFF,%d1
9002:
9003: cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9004: bge.b TWOOK1
9005: bra.w EXPBORS
9006:
9007: TWOOK1:
9008: cmp.l %d1,&0x400D80C0 # |X| > 16480?
9009: ble.b TWOMAIN
9010: bra.w EXPBORS
9011:
9012: TWOMAIN:
9013: #--USUAL CASE, 2^(-70) <= |X| <= 16480
9014:
9015: fmov.x %fp0,%fp1
9016: fmul.s &0x42800000,%fp1 # 64 * X
9017: fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9018: mov.l %d2,-(%sp)
9019: lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9020: fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9021: mov.l INT(%a6),%d1
9022: mov.l %d1,%d2
9023: and.l &0x3F,%d1 # D0 IS J
9024: asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9025: add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9026: asr.l &6,%d2 # d2 IS L, N = 64L + J
9027: mov.l %d2,%d1
9028: asr.l &1,%d1 # D0 IS M
9029: sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9030: add.l &0x3FFF,%d2
9031:
9032: #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9033: #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9034: #--ADJFACT = 2^(M').
9035: #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9036:
9037: fmovm.x &0x0c,-(%sp) # save fp2/fp3
9038:
9039: fmul.s &0x3C800000,%fp1 # (1/64)*N
9040: mov.l (%a1)+,FACT1(%a6)
9041: mov.l (%a1)+,FACT1HI(%a6)
9042: mov.l (%a1)+,FACT1LOW(%a6)
9043: mov.w (%a1)+,FACT2(%a6)
9044:
9045: fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9046:
9047: mov.w (%a1)+,FACT2HI(%a6)
9048: clr.w FACT2HI+2(%a6)
9049: clr.l FACT2LOW(%a6)
9050: add.w %d1,FACT1(%a6)
9051: fmul.x LOG2(%pc),%fp0 # FP0 IS R
9052: add.w %d1,FACT2(%a6)
9053:
9054: bra.w expr
9055:
9056: EXPBORS:
9057: #--FPCR, D0 SAVED
9058: cmp.l %d1,&0x3FFF8000
9059: bgt.b TEXPBIG
9060:
9061: #--|X| IS SMALL, RETURN 1 + X
9062:
9063: fmov.l %d0,%fpcr # restore users round prec,mode
9064: fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9065: bra t_pinx2
9066:
9067: TEXPBIG:
9068: #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9069: #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9070: mov.l X(%a6),%d1
9071: cmp.l %d1,&0
9072: blt.b EXPNEG
9073:
9074: bra t_ovfl2 # t_ovfl expects positive value
9075:
9076: EXPNEG:
9077: bra t_unfl2 # t_unfl expects positive value
9078:
9079: global stwotoxd
9080: stwotoxd:
9081: #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9082:
9083: fmov.l %d0,%fpcr # set user's rounding mode/precision
9084: fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9085: mov.l (%a0),%d1
9086: or.l &0x00800001,%d1
9087: fadd.s %d1,%fp0
9088: bra t_pinx2
9089:
9090: global stentox
9091: #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9092: stentox:
9093: fmovm.x (%a0),&0x80 # LOAD INPUT
9094:
9095: mov.l (%a0),%d1
9096: mov.w 4(%a0),%d1
9097: fmov.x %fp0,X(%a6)
9098: and.l &0x7FFFFFFF,%d1
9099:
9100: cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9101: bge.b TENOK1
9102: bra.w EXPBORS
9103:
9104: TENOK1:
9105: cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9106: ble.b TENMAIN
9107: bra.w EXPBORS
9108:
9109: TENMAIN:
9110: #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9111:
9112: fmov.x %fp0,%fp1
9113: fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9114: fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9115: mov.l %d2,-(%sp)
9116: lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9117: fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9118: mov.l INT(%a6),%d1
9119: mov.l %d1,%d2
9120: and.l &0x3F,%d1 # D0 IS J
9121: asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9122: add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9123: asr.l &6,%d2 # d2 IS L, N = 64L + J
9124: mov.l %d2,%d1
9125: asr.l &1,%d1 # D0 IS M
9126: sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9127: add.l &0x3FFF,%d2
9128:
9129: #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9130: #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9131: #--ADJFACT = 2^(M').
9132: #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9133: fmovm.x &0x0c,-(%sp) # save fp2/fp3
9134:
9135: fmov.x %fp1,%fp2
9136:
9137: fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9138: mov.l (%a1)+,FACT1(%a6)
9139:
9140: fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9141:
9142: mov.l (%a1)+,FACT1HI(%a6)
9143: mov.l (%a1)+,FACT1LOW(%a6)
9144: fsub.x %fp1,%fp0 # X - N L_LEAD
9145: mov.w (%a1)+,FACT2(%a6)
9146:
9147: fsub.x %fp2,%fp0 # X - N L_TRAIL
9148:
9149: mov.w (%a1)+,FACT2HI(%a6)
9150: clr.w FACT2HI+2(%a6)
9151: clr.l FACT2LOW(%a6)
9152:
9153: fmul.x LOG10(%pc),%fp0 # FP0 IS R
9154: add.w %d1,FACT1(%a6)
9155: add.w %d1,FACT2(%a6)
9156:
9157: expr:
9158: #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9159: #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9160: #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9161: #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9162:
9163: fmov.x %fp0,%fp1
9164: fmul.x %fp1,%fp1 # FP1 IS S = R*R
9165:
9166: fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9167: fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9168:
9169: fmul.x %fp1,%fp2 # FP2 IS S*A5
9170: fmul.x %fp1,%fp3 # FP3 IS S*A4
9171:
9172: fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9173: fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9174:
9175: fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9176: fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9177:
9178: fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9179: fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9180:
9181: fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9182: fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9183: fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9184:
9185: fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9186:
9187: #--FINAL RECONSTRUCTION PROCESS
9188: #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9189:
9190: fmul.x FACT1(%a6),%fp0
9191: fadd.x FACT2(%a6),%fp0
9192: fadd.x FACT1(%a6),%fp0
9193:
9194: fmov.l %d0,%fpcr # restore users round prec,mode
9195: mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9196: mov.l (%sp)+,%d2
9197: mov.l &0x80000000,ADJFACT+4(%a6)
9198: clr.l ADJFACT+8(%a6)
9199: mov.b &FMUL_OP,%d1 # last inst is MUL
9200: fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9201: bra t_catch
9202:
9203: global stentoxd
9204: stentoxd:
9205: #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9206:
9207: fmov.l %d0,%fpcr # set user's rounding mode/precision
9208: fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9209: mov.l (%a0),%d1
9210: or.l &0x00800001,%d1
9211: fadd.s %d1,%fp0
9212: bra t_pinx2
9213:
9214: #########################################################################
9215: # sscale(): computes the destination operand scaled by the source #
9216: # operand. If the absoulute value of the source operand is #
9217: # >= 2^14, an overflow or underflow is returned. #
9218: # #
9219: # INPUT *************************************************************** #
9220: # a0 = pointer to double-extended source operand X #
9221: # a1 = pointer to double-extended destination operand Y #
9222: # #
9223: # OUTPUT ************************************************************** #
9224: # fp0 = scale(X,Y) #
9225: # #
9226: #########################################################################
9227:
9228: set SIGN, L_SCR1
9229:
9230: global sscale
9231: sscale:
9232: mov.l %d0,-(%sp) # store off ctrl bits for now
9233:
9234: mov.w DST_EX(%a1),%d1 # get dst exponent
9235: smi.b SIGN(%a6) # use SIGN to hold dst sign
9236: andi.l &0x00007fff,%d1 # strip sign from dst exp
9237:
9238: mov.w SRC_EX(%a0),%d0 # check src bounds
9239: andi.w &0x7fff,%d0 # clr src sign bit
9240: cmpi.w %d0,&0x3fff # is src ~ ZERO?
9241: blt.w src_small # yes
9242: cmpi.w %d0,&0x400c # no; is src too big?
9243: bgt.w src_out # yes
9244:
9245: #
9246: # Source is within 2^14 range.
9247: #
9248: src_ok:
9249: fintrz.x SRC(%a0),%fp0 # calc int of src
9250: fmov.l %fp0,%d0 # int src to d0
9251: # don't want any accrued bits from the fintrz showing up later since
9252: # we may need to read the fpsr for the last fp op in t_catch2().
9253: fmov.l &0x0,%fpsr
9254:
9255: tst.b DST_HI(%a1) # is dst denormalized?
9256: bmi.b sok_norm
9257:
9258: # the dst is a DENORM. normalize the DENORM and add the adjustment to
9259: # the src value. then, jump to the norm part of the routine.
9260: sok_dnrm:
9261: mov.l %d0,-(%sp) # save src for now
9262:
9263: mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9264: mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9265: mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9266:
9267: lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9268: bsr.l norm # normalize the DENORM
9269: neg.l %d0
9270: add.l (%sp)+,%d0 # add adjustment to src
9271:
9272: fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9273:
9274: cmpi.w %d0,&-0x3fff # is the shft amt really low?
9275: bge.b sok_norm2 # thank goodness no
9276:
9277: # the multiply factor that we're trying to create should be a denorm
9278: # for the multiply to work. therefore, we're going to actually do a
9279: # multiply with a denorm which will cause an unimplemented data type
9280: # exception to be put into the machine which will be caught and corrected
9281: # later. we don't do this with the DENORMs above because this method
9282: # is slower. but, don't fret, I don't see it being used much either.
9283: fmov.l (%sp)+,%fpcr # restore user fpcr
9284: mov.l &0x80000000,%d1 # load normalized mantissa
9285: subi.l &-0x3fff,%d0 # how many should we shift?
9286: neg.l %d0 # make it positive
9287: cmpi.b %d0,&0x20 # is it > 32?
9288: bge.b sok_dnrm_32 # yes
9289: lsr.l %d0,%d1 # no; bit stays in upper lw
9290: clr.l -(%sp) # insert zero low mantissa
9291: mov.l %d1,-(%sp) # insert new high mantissa
9292: clr.l -(%sp) # make zero exponent
9293: bra.b sok_norm_cont
9294: sok_dnrm_32:
9295: subi.b &0x20,%d0 # get shift count
9296: lsr.l %d0,%d1 # make low mantissa longword
9297: mov.l %d1,-(%sp) # insert new low mantissa
9298: clr.l -(%sp) # insert zero high mantissa
9299: clr.l -(%sp) # make zero exponent
9300: bra.b sok_norm_cont
9301:
9302: # the src will force the dst to a DENORM value or worse. so, let's
9303: # create an fp multiply that will create the result.
9304: sok_norm:
9305: fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9306: sok_norm2:
9307: fmov.l (%sp)+,%fpcr # restore user fpcr
9308:
9309: addi.w &0x3fff,%d0 # turn src amt into exp value
9310: swap %d0 # put exponent in high word
9311: clr.l -(%sp) # insert new exponent
9312: mov.l &0x80000000,-(%sp) # insert new high mantissa
9313: mov.l %d0,-(%sp) # insert new lo mantissa
9314:
9315: sok_norm_cont:
9316: fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9317: mov.b &FMUL_OP,%d1 # last inst is MUL
9318: fmul.x (%sp)+,%fp0 # do the multiply
9319: bra t_catch2 # catch any exceptions
9320:
9321: #
9322: # Source is outside of 2^14 range. Test the sign and branch
9323: # to the appropriate exception handler.
9324: #
9325: src_out:
9326: mov.l (%sp)+,%d0 # restore ctrl bits
9327: exg %a0,%a1 # swap src,dst ptrs
9328: tst.b SRC_EX(%a1) # is src negative?
9329: bmi t_unfl # yes; underflow
9330: bra t_ovfl_sc # no; overflow
9331:
9332: #
9333: # The source input is below 1, so we check for denormalized numbers
9334: # and set unfl.
9335: #
9336: src_small:
9337: tst.b DST_HI(%a1) # is dst denormalized?
9338: bpl.b ssmall_done # yes
9339:
9340: mov.l (%sp)+,%d0
9341: fmov.l %d0,%fpcr # no; load control bits
9342: mov.b &FMOV_OP,%d1 # last inst is MOVE
9343: fmov.x DST(%a1),%fp0 # simply return dest
9344: bra t_catch2
9345: ssmall_done:
9346: mov.l (%sp)+,%d0 # load control bits into d1
9347: mov.l %a1,%a0 # pass ptr to dst
9348: bra t_resdnrm
9349:
9350: #########################################################################
9351: # smod(): computes the fp MOD of the input values X,Y. #
9352: # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9353: # #
9354: # INPUT *************************************************************** #
9355: # a0 = pointer to extended precision input X #
9356: # a1 = pointer to extended precision input Y #
9357: # d0 = round precision,mode #
9358: # #
9359: # The input operands X and Y can be either normalized or #
9360: # denormalized. #
9361: # #
9362: # OUTPUT ************************************************************** #
9363: # fp0 = FREM(X,Y) or FMOD(X,Y) #
9364: # #
9365: # ALGORITHM *********************************************************** #
9366: # #
9367: # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9368: # signY := sign(Y), X := |X|, Y := |Y|, #
9369: # signQ := signX EOR signY. Record whether MOD or REM #
9370: # is requested. #
9371: # #
9372: # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9373: # If (L < 0) then #
9374: # R := X, go to Step 4. #
9375: # else #
9376: # R := 2^(-L)X, j := L. #
9377: # endif #
9378: # #
9379: # Step 3. Perform MOD(X,Y) #
9380: # 3.1 If R = Y, go to Step 9. #
9381: # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9382: # 3.3 If j = 0, go to Step 4. #
9383: # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9384: # Step 3.1. #
9385: # #
9386: # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9387: # Last_Subtract := false (used in Step 7 below). If #
9388: # MOD is requested, go to Step 6. #
9389: # #
9390: # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9391: # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9392: # Step 6. #
9393: # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9394: # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9395: # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9396: # then { Q := Q + 1, signX := -signX }. #
9397: # #
9398: # Step 6. R := signX*R. #
9399: # #
9400: # Step 7. If Last_Subtract = true, R := R - Y. #
9401: # #
9402: # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9403: # #
9404: # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9405: # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9406: # R := 0. Return signQ, last 7 bits of Q, and R. #
9407: # #
9408: #########################################################################
9409:
9410: set Mod_Flag,L_SCR3
9411: set Sc_Flag,L_SCR3+1
9412:
9413: set SignY,L_SCR2
9414: set SignX,L_SCR2+2
9415: set SignQ,L_SCR3+2
9416:
9417: set Y,FP_SCR0
9418: set Y_Hi,Y+4
9419: set Y_Lo,Y+8
9420:
9421: set R,FP_SCR1
9422: set R_Hi,R+4
9423: set R_Lo,R+8
9424:
9425: Scale:
9426: long 0x00010000,0x80000000,0x00000000,0x00000000
9427:
9428: global smod
9429: smod:
9430: clr.b FPSR_QBYTE(%a6)
9431: mov.l %d0,-(%sp) # save ctrl bits
9432: clr.b Mod_Flag(%a6)
9433: bra.b Mod_Rem
9434:
9435: global srem
9436: srem:
9437: clr.b FPSR_QBYTE(%a6)
9438: mov.l %d0,-(%sp) # save ctrl bits
9439: mov.b &0x1,Mod_Flag(%a6)
9440:
9441: Mod_Rem:
9442: #..Save sign of X and Y
9443: movm.l &0x3f00,-(%sp) # save data registers
9444: mov.w SRC_EX(%a0),%d3
9445: mov.w %d3,SignY(%a6)
9446: and.l &0x00007FFF,%d3 # Y := |Y|
9447:
9448: #
9449: mov.l SRC_HI(%a0),%d4
9450: mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9451:
9452: tst.l %d3
9453: bne.b Y_Normal
9454:
9455: mov.l &0x00003FFE,%d3 # $3FFD + 1
9456: tst.l %d4
9457: bne.b HiY_not0
9458:
9459: HiY_0:
9460: mov.l %d5,%d4
9461: clr.l %d5
9462: sub.l &32,%d3
9463: clr.l %d6
9464: bfffo %d4{&0:&32},%d6
9465: lsl.l %d6,%d4
9466: sub.l %d6,%d3 # (D3,D4,D5) is normalized
9467: # ...with bias $7FFD
9468: bra.b Chk_X
9469:
9470: HiY_not0:
9471: clr.l %d6
9472: bfffo %d4{&0:&32},%d6
9473: sub.l %d6,%d3
9474: lsl.l %d6,%d4
9475: mov.l %d5,%d7 # a copy of D5
9476: lsl.l %d6,%d5
9477: neg.l %d6
9478: add.l &32,%d6
9479: lsr.l %d6,%d7
9480: or.l %d7,%d4 # (D3,D4,D5) normalized
9481: # ...with bias $7FFD
9482: bra.b Chk_X
9483:
9484: Y_Normal:
9485: add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9486: # ...with bias $7FFD
9487:
9488: Chk_X:
9489: mov.w DST_EX(%a1),%d0
9490: mov.w %d0,SignX(%a6)
9491: mov.w SignY(%a6),%d1
9492: eor.l %d0,%d1
9493: and.l &0x00008000,%d1
9494: mov.w %d1,SignQ(%a6) # sign(Q) obtained
9495: and.l &0x00007FFF,%d0
9496: mov.l DST_HI(%a1),%d1
9497: mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9498: tst.l %d0
9499: bne.b X_Normal
9500: mov.l &0x00003FFE,%d0
9501: tst.l %d1
9502: bne.b HiX_not0
9503:
9504: HiX_0:
9505: mov.l %d2,%d1
9506: clr.l %d2
9507: sub.l &32,%d0
9508: clr.l %d6
9509: bfffo %d1{&0:&32},%d6
9510: lsl.l %d6,%d1
9511: sub.l %d6,%d0 # (D0,D1,D2) is normalized
9512: # ...with bias $7FFD
9513: bra.b Init
9514:
9515: HiX_not0:
9516: clr.l %d6
9517: bfffo %d1{&0:&32},%d6
9518: sub.l %d6,%d0
9519: lsl.l %d6,%d1
9520: mov.l %d2,%d7 # a copy of D2
9521: lsl.l %d6,%d2
9522: neg.l %d6
9523: add.l &32,%d6
9524: lsr.l %d6,%d7
9525: or.l %d7,%d1 # (D0,D1,D2) normalized
9526: # ...with bias $7FFD
9527: bra.b Init
9528:
9529: X_Normal:
9530: add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9531: # ...with bias $7FFD
9532:
9533: Init:
9534: #
9535: mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9536: mov.l %d0,-(%sp) # save biased exp(X)
9537: sub.l %d3,%d0 # L := expo(X)-expo(Y)
9538:
9539: clr.l %d6 # D6 := carry <- 0
9540: clr.l %d3 # D3 is Q
9541: mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9542:
9543: #..(Carry,D1,D2) is R
9544: tst.l %d0
9545: bge.b Mod_Loop_pre
9546:
9547: #..expo(X) < expo(Y). Thus X = mod(X,Y)
9548: #
9549: mov.l (%sp)+,%d0 # restore d0
9550: bra.w Get_Mod
9551:
9552: Mod_Loop_pre:
9553: addq.l &0x4,%sp # erase exp(X)
9554: #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9555: Mod_Loop:
9556: tst.l %d6 # test carry bit
9557: bgt.b R_GT_Y
9558:
9559: #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9560: cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9561: bne.b R_NE_Y
9562: cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9563: bne.b R_NE_Y
9564:
9565: #..At this point, R = Y
9566: bra.w Rem_is_0
9567:
9568: R_NE_Y:
9569: #..use the borrow of the previous compare
9570: bcs.b R_LT_Y # borrow is set iff R < Y
9571:
9572: R_GT_Y:
9573: #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9574: #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9575: sub.l %d5,%d2 # lo(R) - lo(Y)
9576: subx.l %d4,%d1 # hi(R) - hi(Y)
9577: clr.l %d6 # clear carry
9578: addq.l &1,%d3 # Q := Q + 1
9579:
9580: R_LT_Y:
9581: #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9582: tst.l %d0 # see if j = 0.
9583: beq.b PostLoop
9584:
9585: add.l %d3,%d3 # Q := 2Q
9586: add.l %d2,%d2 # lo(R) = 2lo(R)
9587: roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9588: scs %d6 # set Carry if 2(R) overflows
9589: addq.l &1,%a1 # k := k+1
9590: subq.l &1,%d0 # j := j - 1
9591: #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9592:
9593: bra.b Mod_Loop
9594:
9595: PostLoop:
9596: #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9597:
9598: #..normalize R.
9599: mov.l L_SCR1(%a6),%d0 # new biased expo of R
9600: tst.l %d1
9601: bne.b HiR_not0
9602:
9603: HiR_0:
9604: mov.l %d2,%d1
9605: clr.l %d2
9606: sub.l &32,%d0
9607: clr.l %d6
9608: bfffo %d1{&0:&32},%d6
9609: lsl.l %d6,%d1
9610: sub.l %d6,%d0 # (D0,D1,D2) is normalized
9611: # ...with bias $7FFD
9612: bra.b Get_Mod
9613:
9614: HiR_not0:
9615: clr.l %d6
9616: bfffo %d1{&0:&32},%d6
9617: bmi.b Get_Mod # already normalized
9618: sub.l %d6,%d0
9619: lsl.l %d6,%d1
9620: mov.l %d2,%d7 # a copy of D2
9621: lsl.l %d6,%d2
9622: neg.l %d6
9623: add.l &32,%d6
9624: lsr.l %d6,%d7
9625: or.l %d7,%d1 # (D0,D1,D2) normalized
9626:
9627: #
9628: Get_Mod:
9629: cmp.l %d0,&0x000041FE
9630: bge.b No_Scale
9631: Do_Scale:
9632: mov.w %d0,R(%a6)
9633: mov.l %d1,R_Hi(%a6)
9634: mov.l %d2,R_Lo(%a6)
9635: mov.l L_SCR1(%a6),%d6
9636: mov.w %d6,Y(%a6)
9637: mov.l %d4,Y_Hi(%a6)
9638: mov.l %d5,Y_Lo(%a6)
9639: fmov.x R(%a6),%fp0 # no exception
9640: mov.b &1,Sc_Flag(%a6)
9641: bra.b ModOrRem
9642: No_Scale:
9643: mov.l %d1,R_Hi(%a6)
9644: mov.l %d2,R_Lo(%a6)
9645: sub.l &0x3FFE,%d0
9646: mov.w %d0,R(%a6)
9647: mov.l L_SCR1(%a6),%d6
9648: sub.l &0x3FFE,%d6
9649: mov.l %d6,L_SCR1(%a6)
9650: fmov.x R(%a6),%fp0
9651: mov.w %d6,Y(%a6)
9652: mov.l %d4,Y_Hi(%a6)
9653: mov.l %d5,Y_Lo(%a6)
9654: clr.b Sc_Flag(%a6)
9655:
9656: #
9657: ModOrRem:
9658: tst.b Mod_Flag(%a6)
9659: beq.b Fix_Sign
9660:
9661: mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
9662: subq.l &1,%d6 # biased expo(Y/2)
9663: cmp.l %d0,%d6
9664: blt.b Fix_Sign
9665: bgt.b Last_Sub
9666:
9667: cmp.l %d1,%d4
9668: bne.b Not_EQ
9669: cmp.l %d2,%d5
9670: bne.b Not_EQ
9671: bra.w Tie_Case
9672:
9673: Not_EQ:
9674: bcs.b Fix_Sign
9675:
9676: Last_Sub:
9677: #
9678: fsub.x Y(%a6),%fp0 # no exceptions
9679: addq.l &1,%d3 # Q := Q + 1
9680:
9681: #
9682: Fix_Sign:
9683: #..Get sign of X
9684: mov.w SignX(%a6),%d6
9685: bge.b Get_Q
9686: fneg.x %fp0
9687:
9688: #..Get Q
9689: #
9690: Get_Q:
9691: clr.l %d6
9692: mov.w SignQ(%a6),%d6 # D6 is sign(Q)
9693: mov.l &8,%d7
9694: lsr.l %d7,%d6
9695: and.l &0x0000007F,%d3 # 7 bits of Q
9696: or.l %d6,%d3 # sign and bits of Q
9697: # swap %d3
9698: # fmov.l %fpsr,%d6
9699: # and.l &0xFF00FFFF,%d6
9700: # or.l %d3,%d6
9701: # fmov.l %d6,%fpsr # put Q in fpsr
9702: mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
9703:
9704: #
9705: Restore:
9706: movm.l (%sp)+,&0xfc # {%d2-%d7}
9707: mov.l (%sp)+,%d0
9708: fmov.l %d0,%fpcr
9709: tst.b Sc_Flag(%a6)
9710: beq.b Finish
9711: mov.b &FMUL_OP,%d1 # last inst is MUL
9712: fmul.x Scale(%pc),%fp0 # may cause underflow
9713: bra t_catch2
9714: # the '040 package did this apparently to see if the dst operand for the
9715: # preceding fmul was a denorm. but, it better not have been since the
9716: # algorithm just got done playing with fp0 and expected no exceptions
9717: # as a result. trust me...
9718: # bra t_avoid_unsupp # check for denorm as a
9719: # ;result of the scaling
9720:
9721: Finish:
9722: mov.b &FMOV_OP,%d1 # last inst is MOVE
9723: fmov.x %fp0,%fp0 # capture exceptions & round
9724: bra t_catch2
9725:
9726: Rem_is_0:
9727: #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
9728: addq.l &1,%d3
9729: cmp.l %d0,&8 # D0 is j
9730: bge.b Q_Big
9731:
9732: lsl.l %d0,%d3
9733: bra.b Set_R_0
9734:
9735: Q_Big:
9736: clr.l %d3
9737:
9738: Set_R_0:
9739: fmov.s &0x00000000,%fp0
9740: clr.b Sc_Flag(%a6)
9741: bra.w Fix_Sign
9742:
9743: Tie_Case:
9744: #..Check parity of Q
9745: mov.l %d3,%d6
9746: and.l &0x00000001,%d6
9747: tst.l %d6
9748: beq.w Fix_Sign # Q is even
9749:
9750: #..Q is odd, Q := Q + 1, signX := -signX
9751: addq.l &1,%d3
9752: mov.w SignX(%a6),%d6
9753: eor.l &0x00008000,%d6
9754: mov.w %d6,SignX(%a6)
9755: bra.w Fix_Sign
9756:
9757: #########################################################################
9758: # XDEF **************************************************************** #
9759: # tag(): return the optype of the input ext fp number #
9760: # #
9761: # This routine is used by the 060FPLSP. #
9762: # #
9763: # XREF **************************************************************** #
9764: # None #
9765: # #
9766: # INPUT *************************************************************** #
9767: # a0 = pointer to extended precision operand #
9768: # #
9769: # OUTPUT ************************************************************** #
9770: # d0 = value of type tag #
9771: # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
9772: # #
9773: # ALGORITHM *********************************************************** #
9774: # Simply test the exponent, j-bit, and mantissa values to #
9775: # determine the type of operand. #
9776: # If it's an unnormalized zero, alter the operand and force it #
9777: # to be a normal zero. #
9778: # #
9779: #########################################################################
9780:
9781: global tag
9782: tag:
9783: mov.w FTEMP_EX(%a0), %d0 # extract exponent
9784: andi.w &0x7fff, %d0 # strip off sign
9785: cmpi.w %d0, &0x7fff # is (EXP == MAX)?
9786: beq.b inf_or_nan_x
9787: not_inf_or_nan_x:
9788: btst &0x7,FTEMP_HI(%a0)
9789: beq.b not_norm_x
9790: is_norm_x:
9791: mov.b &NORM, %d0
9792: rts
9793: not_norm_x:
9794: tst.w %d0 # is exponent = 0?
9795: bne.b is_unnorm_x
9796: not_unnorm_x:
9797: tst.l FTEMP_HI(%a0)
9798: bne.b is_denorm_x
9799: tst.l FTEMP_LO(%a0)
9800: bne.b is_denorm_x
9801: is_zero_x:
9802: mov.b &ZERO, %d0
9803: rts
9804: is_denorm_x:
9805: mov.b &DENORM, %d0
9806: rts
9807: is_unnorm_x:
9808: bsr.l unnorm_fix # convert to norm,denorm,or zero
9809: rts
9810: is_unnorm_reg_x:
9811: mov.b &UNNORM, %d0
9812: rts
9813: inf_or_nan_x:
9814: tst.l FTEMP_LO(%a0)
9815: bne.b is_nan_x
9816: mov.l FTEMP_HI(%a0), %d0
9817: and.l &0x7fffffff, %d0 # msb is a don't care!
9818: bne.b is_nan_x
9819: is_inf_x:
9820: mov.b &INF, %d0
9821: rts
9822: is_nan_x:
9823: mov.b &QNAN, %d0
9824: rts
9825:
9826: #############################################################
9827:
9828: qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
9829:
9830: #########################################################################
9831: # XDEF **************************************************************** #
9832: # t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. #
9833: # t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. #
9834: # #
9835: # These rouitnes are used by the 060FPLSP package. #
9836: # #
9837: # XREF **************************************************************** #
9838: # None #
9839: # #
9840: # INPUT *************************************************************** #
9841: # a0 = pointer to extended precision source operand. #
9842: # #
9843: # OUTPUT ************************************************************** #
9844: # fp0 = default DZ result. #
9845: # #
9846: # ALGORITHM *********************************************************** #
9847: # Transcendental emulation for the 060FPLSP has detected that #
9848: # a DZ exception should occur for the instruction. If DZ is disabled, #
9849: # return the default result. #
9850: # If DZ is enabled, the dst operand should be returned unscathed #
9851: # in fp0 while fp1 is used to create a DZ exception so that the #
9852: # operating system can log that such an event occurred. #
9853: # #
9854: #########################################################################
9855:
9856: global t_dz
9857: t_dz:
9858: tst.b SRC_EX(%a0) # check sign for neg or pos
9859: bpl.b dz_pinf # branch if pos sign
9860:
9861: global t_dz2
9862: t_dz2:
9863: ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
9864:
9865: btst &dz_bit,FPCR_ENABLE(%a6)
9866: bne.b dz_minf_ena
9867:
9868: # dz is disabled. return a -INF.
9869: fmov.s &0xff800000,%fp0 # return -INF
9870: rts
9871:
9872: # dz is enabled. create a dz exception so the user can record it
9873: # but use fp1 instead. return the dst operand unscathed in fp0.
9874: dz_minf_ena:
9875: fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9876: fmov.l USER_FPCR(%a6),%fpcr
9877: fmov.s &0xbf800000,%fp1 # load -1
9878: fdiv.s &0x00000000,%fp1 # -1 / 0
9879: rts
9880:
9881: dz_pinf:
9882: ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
9883:
9884: btst &dz_bit,FPCR_ENABLE(%a6)
9885: bne.b dz_pinf_ena
9886:
9887: # dz is disabled. return a +INF.
9888: fmov.s &0x7f800000,%fp0 # return +INF
9889: rts
9890:
9891: # dz is enabled. create a dz exception so the user can record it
9892: # but use fp1 instead. return the dst operand unscathed in fp0.
9893: dz_pinf_ena:
9894: fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9895: fmov.l USER_FPCR(%a6),%fpcr
9896: fmov.s &0x3f800000,%fp1 # load +1
9897: fdiv.s &0x00000000,%fp1 # +1 / 0
9898: rts
9899:
9900: #########################################################################
9901: # XDEF **************************************************************** #
9902: # t_operr(): Handle 060FPLSP OPERR exception during emulation. #
9903: # #
9904: # This routine is used by the 060FPLSP package. #
9905: # #
9906: # XREF **************************************************************** #
9907: # None. #
9908: # #
9909: # INPUT *************************************************************** #
9910: # fp1 = source operand #
9911: # #
9912: # OUTPUT ************************************************************** #
9913: # fp0 = default result #
9914: # fp1 = unchanged #
9915: # #
9916: # ALGORITHM *********************************************************** #
9917: # An operand error should occur as the result of transcendental #
9918: # emulation in the 060FPLSP. If OPERR is disabled, just return a NAN #
9919: # in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 #
9920: # and the source operand in fp1. Use fp2 to create an OPERR exception #
9921: # so that the operating system can log the event. #
9922: # #
9923: #########################################################################
9924:
9925: global t_operr
9926: t_operr:
9927: ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP
9928:
9929: btst &operr_bit,FPCR_ENABLE(%a6)
9930: bne.b operr_ena
9931:
9932: # operr is disabled. return a QNAN in fp0
9933: fmovm.x qnan(%pc),&0x80 # return QNAN
9934: rts
9935:
9936: # operr is enabled. create an operr exception so the user can record it
9937: # but use fp2 instead. return the dst operand unscathed in fp0.
9938: operr_ena:
9939: fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9940: fmov.l USER_FPCR(%a6),%fpcr
9941: fmovm.x &0x04,-(%sp) # save fp2
9942: fmov.s &0x7f800000,%fp2 # load +INF
9943: fmul.s &0x00000000,%fp2 # +INF x 0
9944: fmovm.x (%sp)+,&0x20 # restore fp2
9945: rts
9946:
9947: pls_huge:
9948: long 0x7ffe0000,0xffffffff,0xffffffff
9949: mns_huge:
9950: long 0xfffe0000,0xffffffff,0xffffffff
9951: pls_tiny:
9952: long 0x00000000,0x80000000,0x00000000
9953: mns_tiny:
9954: long 0x80000000,0x80000000,0x00000000
9955:
9956: #########################################################################
9957: # XDEF **************************************************************** #
9958: # t_unfl(): Handle 060FPLSP underflow exception during emulation. #
9959: # t_unfl2(): Handle 060FPLSP underflow exception during #
9960: # emulation. result always positive. #
9961: # #
9962: # This routine is used by the 060FPLSP package. #
9963: # #
9964: # XREF **************************************************************** #
9965: # None. #
9966: # #
9967: # INPUT *************************************************************** #
9968: # a0 = pointer to extended precision source operand #
9969: # #
9970: # OUTPUT ************************************************************** #
9971: # fp0 = default underflow result #
9972: # #
9973: # ALGORITHM *********************************************************** #
9974: # An underflow should occur as the result of transcendental #
9975: # emulation in the 060FPLSP. Create an underflow by using "fmul" #
9976: # and two very small numbers of appropriate sign so the operating #
9977: # system can log the event. #
9978: # #
9979: #########################################################################
9980:
9981: global t_unfl
9982: t_unfl:
9983: tst.b SRC_EX(%a0)
9984: bpl.b unf_pos
9985:
9986: global t_unfl2
9987: t_unfl2:
9988: ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX
9989:
9990: fmov.l USER_FPCR(%a6),%fpcr
9991: fmovm.x mns_tiny(%pc),&0x80
9992: fmul.x pls_tiny(%pc),%fp0
9993:
9994: fmov.l %fpsr,%d0
9995: rol.l &0x8,%d0
9996: mov.b %d0,FPSR_CC(%a6)
9997: rts
9998: unf_pos:
9999: ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX
10000:
10001: fmov.l USER_FPCR(%a6),%fpcr
10002: fmovm.x pls_tiny(%pc),&0x80
10003: fmul.x %fp0,%fp0
10004:
10005: fmov.l %fpsr,%d0
10006: rol.l &0x8,%d0
10007: mov.b %d0,FPSR_CC(%a6)
10008: rts
10009:
10010: #########################################################################
10011: # XDEF **************************************************************** #
10012: # t_ovfl(): Handle 060FPLSP overflow exception during emulation. #
10013: # (monadic) #
10014: # t_ovfl2(): Handle 060FPLSP overflow exception during #
10015: # emulation. result always positive. (dyadic) #
10016: # t_ovfl_sc(): Handle 060FPLSP overflow exception during #
10017: # emulation for "fscale". #
10018: # #
10019: # This routine is used by the 060FPLSP package. #
10020: # #
10021: # XREF **************************************************************** #
10022: # None. #
10023: # #
10024: # INPUT *************************************************************** #
10025: # a0 = pointer to extended precision source operand #
10026: # #
10027: # OUTPUT ************************************************************** #
10028: # fp0 = default underflow result #
10029: # #
10030: # ALGORITHM *********************************************************** #
10031: # An overflow should occur as the result of transcendental #
10032: # emulation in the 060FPLSP. Create an overflow by using "fmul" #
10033: # and two very lareg numbers of appropriate sign so the operating #
10034: # system can log the event. #
10035: # For t_ovfl_sc() we take special care not to lose the INEX2 bit. #
10036: # #
10037: #########################################################################
10038:
10039: global t_ovfl_sc
10040: t_ovfl_sc:
10041: ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10042:
10043: mov.b %d0,%d1 # fetch rnd prec,mode
10044: andi.b &0xc0,%d1 # extract prec
10045: beq.w ovfl_work
10046:
10047: # dst op is a DENORM. we have to normalize the mantissa to see if the
10048: # result would be inexact for the given precision. make a copy of the
10049: # dst so we don't screw up the version passed to us.
10050: mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10051: mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10052: mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10053: lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10054: movm.l &0xc080,-(%sp) # save d0-d1/a0
10055: bsr.l norm # normalize mantissa
10056: movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10057:
10058: cmpi.b %d1,&0x40 # is precision sgl?
10059: bne.b ovfl_sc_dbl # no; dbl
10060: ovfl_sc_sgl:
10061: tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10062: bne.b ovfl_sc_inx # yes
10063: tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10064: bne.b ovfl_sc_inx # yes
10065: bra.w ovfl_work # don't set INEX2
10066: ovfl_sc_dbl:
10067: mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10068: andi.l &0x7ff,%d1 # dbl mantissa set?
10069: beq.w ovfl_work # no; don't set INEX2
10070: ovfl_sc_inx:
10071: ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10072: bra.b ovfl_work # continue
10073:
10074: global t_ovfl
10075: t_ovfl:
10076: ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10077: ovfl_work:
10078: tst.b SRC_EX(%a0)
10079: bpl.b ovfl_p
10080: ovfl_m:
10081: fmov.l USER_FPCR(%a6),%fpcr
10082: fmovm.x mns_huge(%pc),&0x80
10083: fmul.x pls_huge(%pc),%fp0
10084:
10085: fmov.l %fpsr,%d0
10086: rol.l &0x8,%d0
10087: ori.b &neg_mask,%d0
10088: mov.b %d0,FPSR_CC(%a6)
10089: rts
10090: ovfl_p:
10091: fmov.l USER_FPCR(%a6),%fpcr
10092: fmovm.x pls_huge(%pc),&0x80
10093: fmul.x pls_huge(%pc),%fp0
10094:
10095: fmov.l %fpsr,%d0
10096: rol.l &0x8,%d0
10097: mov.b %d0,FPSR_CC(%a6)
10098: rts
10099:
10100: global t_ovfl2
10101: t_ovfl2:
10102: ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10103: fmov.l USER_FPCR(%a6),%fpcr
10104: fmovm.x pls_huge(%pc),&0x80
10105: fmul.x pls_huge(%pc),%fp0
10106:
10107: fmov.l %fpsr,%d0
10108: rol.l &0x8,%d0
10109: mov.b %d0,FPSR_CC(%a6)
10110: rts
10111:
10112: #########################################################################
10113: # XDEF **************************************************************** #
10114: # t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10115: # emulation. #
10116: # t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10117: # emulation. #
10118: # #
10119: # These routines are used by the 060FPLSP package. #
10120: # #
10121: # XREF **************************************************************** #
10122: # None. #
10123: # #
10124: # INPUT *************************************************************** #
10125: # fp0 = default underflow or overflow result #
10126: # #
10127: # OUTPUT ************************************************************** #
10128: # fp0 = default result #
10129: # #
10130: # ALGORITHM *********************************************************** #
10131: # If an overflow or underflow occurred during the last #
10132: # instruction of transcendental 060FPLSP emulation, then it has already #
10133: # occurred and has been logged. Now we need to see if an inexact #
10134: # exception should occur. #
10135: # #
10136: #########################################################################
10137:
10138: global t_catch2
10139: t_catch2:
10140: fmov.l %fpsr,%d0
10141: or.l %d0,USER_FPSR(%a6)
10142: bra.b inx2_work
10143:
10144: global t_catch
10145: t_catch:
10146: fmov.l %fpsr,%d0
10147: or.l %d0,USER_FPSR(%a6)
10148:
10149: #########################################################################
10150: # XDEF **************************************************************** #
10151: # t_inx2(): Handle inexact 060FPLSP exception during emulation. #
10152: # t_pinx2(): Handle inexact 060FPLSP exception for "+" results. #
10153: # t_minx2(): Handle inexact 060FPLSP exception for "-" results. #
10154: # #
10155: # XREF **************************************************************** #
10156: # None. #
10157: # #
10158: # INPUT *************************************************************** #
10159: # fp0 = default result #
10160: # #
10161: # OUTPUT ************************************************************** #
10162: # fp0 = default result #
10163: # #
10164: # ALGORITHM *********************************************************** #
10165: # The last instruction of transcendental emulation for the #
10166: # 060FPLSP should be inexact. So, if inexact is enabled, then we create #
10167: # the event here by adding a large and very small number together #
10168: # so that the operating system can log the event. #
10169: # Must check, too, if the result was zero, in which case we just #
10170: # set the FPSR bits and return. #
10171: # #
10172: #########################################################################
10173:
10174: global t_inx2
10175: t_inx2:
10176: fblt.w t_minx2
10177: fbeq.w inx2_zero
10178:
10179: global t_pinx2
10180: t_pinx2:
10181: ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX
10182: bra.b inx2_work
10183:
10184: global t_minx2
10185: t_minx2:
10186: ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6)
10187:
10188: inx2_work:
10189: btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
10190: bne.b inx2_work_ena # yes
10191: rts
10192: inx2_work_ena:
10193: fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions
10194: fmov.s &0x3f800000,%fp1 # load +1
10195: fadd.x pls_tiny(%pc),%fp1 # cause exception
10196: rts
10197:
10198: inx2_zero:
10199: mov.b &z_bmask,FPSR_CC(%a6)
10200: ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX
10201: rts
10202:
10203: #########################################################################
10204: # XDEF **************************************************************** #
10205: # t_extdnrm(): Handle DENORM inputs in 060FPLSP. #
10206: # t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". #
10207: # #
10208: # This routine is used by the 060FPLSP package. #
10209: # #
10210: # XREF **************************************************************** #
10211: # None. #
10212: # #
10213: # INPUT *************************************************************** #
10214: # a0 = pointer to extended precision input operand #
10215: # #
10216: # OUTPUT ************************************************************** #
10217: # fp0 = default result #
10218: # #
10219: # ALGORITHM *********************************************************** #
10220: # For all functions that have a denormalized input and that #
10221: # f(x)=x, this is the entry point. #
10222: # DENORM value is moved using "fmove" which triggers an exception #
10223: # if enabled so the operating system can log the event. #
10224: # #
10225: #########################################################################
10226:
10227: global t_extdnrm
10228: t_extdnrm:
10229: fmov.l USER_FPCR(%a6),%fpcr
10230: fmov.x SRC_EX(%a0),%fp0
10231: fmov.l %fpsr,%d0
10232: ori.l &unfinx_mask,%d0
10233: or.l %d0,USER_FPSR(%a6)
10234: rts
10235:
10236: global t_resdnrm
10237: t_resdnrm:
10238: fmov.l USER_FPCR(%a6),%fpcr
10239: fmov.x SRC_EX(%a0),%fp0
10240: fmov.l %fpsr,%d0
10241: or.l %d0,USER_FPSR(%a6)
10242: rts
10243:
10244: ##########################################
10245:
10246: #
10247: # sto_cos:
10248: # This is used by fsincos library emulation. The correct
10249: # values are already in fp0 and fp1 so we do nothing here.
10250: #
10251: global sto_cos
10252: sto_cos:
10253: rts
10254:
10255: ##########################################
10256:
10257: #
10258: # dst_qnan --- force result when destination is a NaN
10259: #
10260: global dst_qnan
10261: dst_qnan:
10262: fmov.x DST(%a1),%fp0
10263: tst.b DST_EX(%a1)
10264: bmi.b dst_qnan_m
10265: dst_qnan_p:
10266: mov.b &nan_bmask,FPSR_CC(%a6)
10267: rts
10268: dst_qnan_m:
10269: mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10270: rts
10271:
10272: #
10273: # src_qnan --- force result when source is a NaN
10274: #
10275: global src_qnan
10276: src_qnan:
10277: fmov.x SRC(%a0),%fp0
10278: tst.b SRC_EX(%a0)
10279: bmi.b src_qnan_m
10280: src_qnan_p:
10281: mov.b &nan_bmask,FPSR_CC(%a6)
10282: rts
10283: src_qnan_m:
10284: mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10285: rts
10286:
10287: ##########################################
10288:
10289: #
10290: # Native instruction support
10291: #
10292: # Some systems may need entry points even for 68060 native
10293: # instructions. These routines are provided for
10294: # convenience.
10295: #
10296: global _fadds_
10297: _fadds_:
10298: fmov.l %fpcr,-(%sp) # save fpcr
10299: fmov.l &0x00000000,%fpcr # clear fpcr for load
10300: fmov.s 0x8(%sp),%fp0 # load sgl dst
10301: fmov.l (%sp)+,%fpcr # restore fpcr
10302: fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src
10303: rts
10304:
10305: global _faddd_
10306: _faddd_:
10307: fmov.l %fpcr,-(%sp) # save fpcr
10308: fmov.l &0x00000000,%fpcr # clear fpcr for load
10309: fmov.d 0x8(%sp),%fp0 # load dbl dst
10310: fmov.l (%sp)+,%fpcr # restore fpcr
10311: fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src
10312: rts
10313:
10314: global _faddx_
10315: _faddx_:
10316: fmovm.x 0x4(%sp),&0x80 # load ext dst
10317: fadd.x 0x10(%sp),%fp0 # fadd w/ ext src
10318: rts
10319:
10320: global _fsubs_
10321: _fsubs_:
10322: fmov.l %fpcr,-(%sp) # save fpcr
10323: fmov.l &0x00000000,%fpcr # clear fpcr for load
10324: fmov.s 0x8(%sp),%fp0 # load sgl dst
10325: fmov.l (%sp)+,%fpcr # restore fpcr
10326: fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src
10327: rts
10328:
10329: global _fsubd_
10330: _fsubd_:
10331: fmov.l %fpcr,-(%sp) # save fpcr
10332: fmov.l &0x00000000,%fpcr # clear fpcr for load
10333: fmov.d 0x8(%sp),%fp0 # load dbl dst
10334: fmov.l (%sp)+,%fpcr # restore fpcr
10335: fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src
10336: rts
10337:
10338: global _fsubx_
10339: _fsubx_:
10340: fmovm.x 0x4(%sp),&0x80 # load ext dst
10341: fsub.x 0x10(%sp),%fp0 # fsub w/ ext src
10342: rts
10343:
10344: global _fmuls_
10345: _fmuls_:
10346: fmov.l %fpcr,-(%sp) # save fpcr
10347: fmov.l &0x00000000,%fpcr # clear fpcr for load
10348: fmov.s 0x8(%sp),%fp0 # load sgl dst
10349: fmov.l (%sp)+,%fpcr # restore fpcr
10350: fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src
10351: rts
10352:
10353: global _fmuld_
10354: _fmuld_:
10355: fmov.l %fpcr,-(%sp) # save fpcr
10356: fmov.l &0x00000000,%fpcr # clear fpcr for load
10357: fmov.d 0x8(%sp),%fp0 # load dbl dst
10358: fmov.l (%sp)+,%fpcr # restore fpcr
10359: fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src
10360: rts
10361:
10362: global _fmulx_
10363: _fmulx_:
10364: fmovm.x 0x4(%sp),&0x80 # load ext dst
10365: fmul.x 0x10(%sp),%fp0 # fmul w/ ext src
10366: rts
10367:
10368: global _fdivs_
10369: _fdivs_:
10370: fmov.l %fpcr,-(%sp) # save fpcr
10371: fmov.l &0x00000000,%fpcr # clear fpcr for load
10372: fmov.s 0x8(%sp),%fp0 # load sgl dst
10373: fmov.l (%sp)+,%fpcr # restore fpcr
10374: fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src
10375: rts
10376:
10377: global _fdivd_
10378: _fdivd_:
10379: fmov.l %fpcr,-(%sp) # save fpcr
10380: fmov.l &0x00000000,%fpcr # clear fpcr for load
10381: fmov.d 0x8(%sp),%fp0 # load dbl dst
10382: fmov.l (%sp)+,%fpcr # restore fpcr
10383: fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src
10384: rts
10385:
10386: global _fdivx_
10387: _fdivx_:
10388: fmovm.x 0x4(%sp),&0x80 # load ext dst
10389: fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src
10390: rts
10391:
10392: global _fabss_
10393: _fabss_:
10394: fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src
10395: rts
10396:
10397: global _fabsd_
10398: _fabsd_:
10399: fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src
10400: rts
10401:
10402: global _fabsx_
10403: _fabsx_:
10404: fabs.x 0x4(%sp),%fp0 # fabs w/ ext src
10405: rts
10406:
10407: global _fnegs_
10408: _fnegs_:
10409: fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src
10410: rts
10411:
10412: global _fnegd_
10413: _fnegd_:
10414: fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src
10415: rts
10416:
10417: global _fnegx_
10418: _fnegx_:
10419: fneg.x 0x4(%sp),%fp0 # fneg w/ ext src
10420: rts
10421:
10422: global _fsqrts_
10423: _fsqrts_:
10424: fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src
10425: rts
10426:
10427: global _fsqrtd_
10428: _fsqrtd_:
10429: fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src
10430: rts
10431:
10432: global _fsqrtx_
10433: _fsqrtx_:
10434: fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src
10435: rts
10436:
10437: global _fints_
10438: _fints_:
10439: fint.s 0x4(%sp),%fp0 # fint w/ sgl src
10440: rts
10441:
10442: global _fintd_
10443: _fintd_:
10444: fint.d 0x4(%sp),%fp0 # fint w/ dbl src
10445: rts
10446:
10447: global _fintx_
10448: _fintx_:
10449: fint.x 0x4(%sp),%fp0 # fint w/ ext src
10450: rts
10451:
10452: global _fintrzs_
10453: _fintrzs_:
10454: fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src
10455: rts
10456:
10457: global _fintrzd_
10458: _fintrzd_:
10459: fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src
10460: rts
10461:
10462: global _fintrzx_
10463: _fintrzx_:
10464: fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src
10465: rts
10466:
10467: ########################################################################
10468:
10469: #########################################################################
10470: # src_zero(): Return signed zero according to sign of src operand. #
10471: #########################################################################
10472: global src_zero
10473: src_zero:
10474: tst.b SRC_EX(%a0) # get sign of src operand
10475: bmi.b ld_mzero # if neg, load neg zero
10476:
10477: #
10478: # ld_pzero(): return a positive zero.
10479: #
10480: global ld_pzero
10481: ld_pzero:
10482: fmov.s &0x00000000,%fp0 # load +0
10483: mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10484: rts
10485:
10486: # ld_mzero(): return a negative zero.
10487: global ld_mzero
10488: ld_mzero:
10489: fmov.s &0x80000000,%fp0 # load -0
10490: mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10491: rts
10492:
10493: #########################################################################
10494: # dst_zero(): Return signed zero according to sign of dst operand. #
10495: #########################################################################
10496: global dst_zero
10497: dst_zero:
10498: tst.b DST_EX(%a1) # get sign of dst operand
10499: bmi.b ld_mzero # if neg, load neg zero
10500: bra.b ld_pzero # load positive zero
10501:
10502: #########################################################################
10503: # src_inf(): Return signed inf according to sign of src operand. #
10504: #########################################################################
10505: global src_inf
10506: src_inf:
10507: tst.b SRC_EX(%a0) # get sign of src operand
10508: bmi.b ld_minf # if negative branch
10509:
10510: #
10511: # ld_pinf(): return a positive infinity.
10512: #
10513: global ld_pinf
10514: ld_pinf:
10515: fmov.s &0x7f800000,%fp0 # load +INF
10516: mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10517: rts
10518:
10519: #
10520: # ld_minf():return a negative infinity.
10521: #
10522: global ld_minf
10523: ld_minf:
10524: fmov.s &0xff800000,%fp0 # load -INF
10525: mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10526: rts
10527:
10528: #########################################################################
10529: # dst_inf(): Return signed inf according to sign of dst operand. #
10530: #########################################################################
10531: global dst_inf
10532: dst_inf:
10533: tst.b DST_EX(%a1) # get sign of dst operand
10534: bmi.b ld_minf # if negative branch
10535: bra.b ld_pinf
10536:
10537: global szr_inf
10538: #################################################################
10539: # szr_inf(): Return +ZERO for a negative src operand or #
10540: # +INF for a positive src operand. #
10541: # Routine used for fetox, ftwotox, and ftentox. #
10542: #################################################################
10543: szr_inf:
10544: tst.b SRC_EX(%a0) # check sign of source
10545: bmi.b ld_pzero
10546: bra.b ld_pinf
10547:
10548: #########################################################################
10549: # sopr_inf(): Return +INF for a positive src operand or #
10550: # jump to operand error routine for a negative src operand. #
10551: # Routine used for flogn, flognp1, flog10, and flog2. #
10552: #########################################################################
10553: global sopr_inf
10554: sopr_inf:
10555: tst.b SRC_EX(%a0) # check sign of source
10556: bmi.w t_operr
10557: bra.b ld_pinf
10558:
10559: #################################################################
10560: # setoxm1i(): Return minus one for a negative src operand or #
10561: # positive infinity for a positive src operand. #
10562: # Routine used for fetoxm1. #
10563: #################################################################
10564: global setoxm1i
10565: setoxm1i:
10566: tst.b SRC_EX(%a0) # check sign of source
10567: bmi.b ld_mone
10568: bra.b ld_pinf
10569:
10570: #########################################################################
10571: # src_one(): Return signed one according to sign of src operand. #
10572: #########################################################################
10573: global src_one
10574: src_one:
10575: tst.b SRC_EX(%a0) # check sign of source
10576: bmi.b ld_mone
10577:
10578: #
10579: # ld_pone(): return positive one.
10580: #
10581: global ld_pone
10582: ld_pone:
10583: fmov.s &0x3f800000,%fp0 # load +1
10584: clr.b FPSR_CC(%a6)
10585: rts
10586:
10587: #
10588: # ld_mone(): return negative one.
10589: #
10590: global ld_mone
10591: ld_mone:
10592: fmov.s &0xbf800000,%fp0 # load -1
10593: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10594: rts
10595:
10596: ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10597: mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10598:
10599: #################################################################
10600: # spi_2(): Return signed PI/2 according to sign of src operand. #
10601: #################################################################
10602: global spi_2
10603: spi_2:
10604: tst.b SRC_EX(%a0) # check sign of source
10605: bmi.b ld_mpi2
10606:
10607: #
10608: # ld_ppi2(): return positive PI/2.
10609: #
10610: global ld_ppi2
10611: ld_ppi2:
10612: fmov.l %d0,%fpcr
10613: fmov.x ppiby2(%pc),%fp0 # load +pi/2
10614: bra.w t_pinx2 # set INEX2
10615:
10616: #
10617: # ld_mpi2(): return negative PI/2.
10618: #
10619: global ld_mpi2
10620: ld_mpi2:
10621: fmov.l %d0,%fpcr
10622: fmov.x mpiby2(%pc),%fp0 # load -pi/2
10623: bra.w t_minx2 # set INEX2
10624:
10625: ####################################################
10626: # The following routines give support for fsincos. #
10627: ####################################################
10628:
10629: #
10630: # ssincosz(): When the src operand is ZERO, store a one in the
10631: # cosine register and return a ZERO in fp0 w/ the same sign
10632: # as the src operand.
10633: #
10634: global ssincosz
10635: ssincosz:
10636: fmov.s &0x3f800000,%fp1
10637: tst.b SRC_EX(%a0) # test sign
10638: bpl.b sincoszp
10639: fmov.s &0x80000000,%fp0 # return sin result in fp0
10640: mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10641: rts
10642: sincoszp:
10643: fmov.s &0x00000000,%fp0 # return sin result in fp0
10644: mov.b &z_bmask,FPSR_CC(%a6)
10645: rts
10646:
10647: #
10648: # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10649: # register and jump to the operand error routine for negative
10650: # src operands.
10651: #
10652: global ssincosi
10653: ssincosi:
10654: fmov.x qnan(%pc),%fp1 # load NAN
10655: bra.w t_operr
10656:
10657: #
10658: # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10659: # register and branch to the src QNAN routine.
10660: #
10661: global ssincosqnan
10662: ssincosqnan:
10663: fmov.x LOCAL_EX(%a0),%fp1
10664: bra.w src_qnan
10665:
10666: ########################################################################
10667:
10668: global smod_sdnrm
10669: global smod_snorm
10670: smod_sdnrm:
10671: smod_snorm:
10672: mov.b DTAG(%a6),%d1
10673: beq.l smod
10674: cmpi.b %d1,&ZERO
10675: beq.w smod_zro
10676: cmpi.b %d1,&INF
10677: beq.l t_operr
10678: cmpi.b %d1,&DENORM
10679: beq.l smod
10680: bra.l dst_qnan
10681:
10682: global smod_szero
10683: smod_szero:
10684: mov.b DTAG(%a6),%d1
10685: beq.l t_operr
10686: cmpi.b %d1,&ZERO
10687: beq.l t_operr
10688: cmpi.b %d1,&INF
10689: beq.l t_operr
10690: cmpi.b %d1,&DENORM
10691: beq.l t_operr
10692: bra.l dst_qnan
10693:
10694: global smod_sinf
10695: smod_sinf:
10696: mov.b DTAG(%a6),%d1
10697: beq.l smod_fpn
10698: cmpi.b %d1,&ZERO
10699: beq.l smod_zro
10700: cmpi.b %d1,&INF
10701: beq.l t_operr
10702: cmpi.b %d1,&DENORM
10703: beq.l smod_fpn
10704: bra.l dst_qnan
10705:
10706: smod_zro:
10707: srem_zro:
10708: mov.b SRC_EX(%a0),%d1 # get src sign
10709: mov.b DST_EX(%a1),%d0 # get dst sign
10710: eor.b %d0,%d1 # get qbyte sign
10711: andi.b &0x80,%d1
10712: mov.b %d1,FPSR_QBYTE(%a6)
10713: tst.b %d0
10714: bpl.w ld_pzero
10715: bra.w ld_mzero
10716:
10717: smod_fpn:
10718: srem_fpn:
10719: clr.b FPSR_QBYTE(%a6)
10720: mov.l %d0,-(%sp)
10721: mov.b SRC_EX(%a0),%d1 # get src sign
10722: mov.b DST_EX(%a1),%d0 # get dst sign
10723: eor.b %d0,%d1 # get qbyte sign
10724: andi.b &0x80,%d1
10725: mov.b %d1,FPSR_QBYTE(%a6)
10726: cmpi.b DTAG(%a6),&DENORM
10727: bne.b smod_nrm
10728: lea DST(%a1),%a0
10729: mov.l (%sp)+,%d0
10730: bra t_resdnrm
10731: smod_nrm:
10732: fmov.l (%sp)+,%fpcr
10733: fmov.x DST(%a1),%fp0
10734: tst.b DST_EX(%a1)
10735: bmi.b smod_nrm_neg
10736: rts
10737:
10738: smod_nrm_neg:
10739: mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code
10740: rts
10741:
10742: #########################################################################
10743: global srem_snorm
10744: global srem_sdnrm
10745: srem_sdnrm:
10746: srem_snorm:
10747: mov.b DTAG(%a6),%d1
10748: beq.l srem
10749: cmpi.b %d1,&ZERO
10750: beq.w srem_zro
10751: cmpi.b %d1,&INF
10752: beq.l t_operr
10753: cmpi.b %d1,&DENORM
10754: beq.l srem
10755: bra.l dst_qnan
10756:
10757: global srem_szero
10758: srem_szero:
10759: mov.b DTAG(%a6),%d1
10760: beq.l t_operr
10761: cmpi.b %d1,&ZERO
10762: beq.l t_operr
10763: cmpi.b %d1,&INF
10764: beq.l t_operr
10765: cmpi.b %d1,&DENORM
10766: beq.l t_operr
10767: bra.l dst_qnan
10768:
10769: global srem_sinf
10770: srem_sinf:
10771: mov.b DTAG(%a6),%d1
10772: beq.w srem_fpn
10773: cmpi.b %d1,&ZERO
10774: beq.w srem_zro
10775: cmpi.b %d1,&INF
10776: beq.l t_operr
10777: cmpi.b %d1,&DENORM
10778: beq.l srem_fpn
10779: bra.l dst_qnan
10780:
10781: #########################################################################
10782:
10783: global sscale_snorm
10784: global sscale_sdnrm
10785: sscale_snorm:
10786: sscale_sdnrm:
10787: mov.b DTAG(%a6),%d1
10788: beq.l sscale
10789: cmpi.b %d1,&ZERO
10790: beq.l dst_zero
10791: cmpi.b %d1,&INF
10792: beq.l dst_inf
10793: cmpi.b %d1,&DENORM
10794: beq.l sscale
10795: bra.l dst_qnan
10796:
10797: global sscale_szero
10798: sscale_szero:
10799: mov.b DTAG(%a6),%d1
10800: beq.l sscale
10801: cmpi.b %d1,&ZERO
10802: beq.l dst_zero
10803: cmpi.b %d1,&INF
10804: beq.l dst_inf
10805: cmpi.b %d1,&DENORM
10806: beq.l sscale
10807: bra.l dst_qnan
10808:
10809: global sscale_sinf
10810: sscale_sinf:
10811: mov.b DTAG(%a6),%d1
10812: beq.l t_operr
10813: cmpi.b %d1,&QNAN
10814: beq.l dst_qnan
10815: bra.l t_operr
10816:
10817: ########################################################################
10818:
10819: global sop_sqnan
10820: sop_sqnan:
10821: mov.b DTAG(%a6),%d1
10822: cmpi.b %d1,&QNAN
10823: beq.l dst_qnan
10824: bra.l src_qnan
10825:
10826: #########################################################################
10827: # norm(): normalize the mantissa of an extended precision input. the #
10828: # input operand should not be normalized already. #
10829: # #
10830: # XDEF **************************************************************** #
10831: # norm() #
10832: # #
10833: # XREF **************************************************************** #
10834: # none #
10835: # #
10836: # INPUT *************************************************************** #
10837: # a0 = pointer fp extended precision operand to normalize #
10838: # #
10839: # OUTPUT ************************************************************** #
10840: # d0 = number of bit positions the mantissa was shifted #
10841: # a0 = the input operand's mantissa is normalized; the exponent #
10842: # is unchanged. #
10843: # #
10844: #########################################################################
10845: global norm
10846: norm:
10847: mov.l %d2, -(%sp) # create some temp regs
10848: mov.l %d3, -(%sp)
10849:
10850: mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
10851: mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
10852:
10853: bfffo %d0{&0:&32}, %d2 # how many places to shift?
10854: beq.b norm_lo # hi(man) is all zeroes!
10855:
10856: norm_hi:
10857: lsl.l %d2, %d0 # left shift hi(man)
10858: bfextu %d1{&0:%d2}, %d3 # extract lo bits
10859:
10860: or.l %d3, %d0 # create hi(man)
10861: lsl.l %d2, %d1 # create lo(man)
10862:
10863: mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10864: mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
10865:
10866: mov.l %d2, %d0 # return shift amount
10867:
10868: mov.l (%sp)+, %d3 # restore temp regs
10869: mov.l (%sp)+, %d2
10870:
10871: rts
10872:
10873: norm_lo:
10874: bfffo %d1{&0:&32}, %d2 # how many places to shift?
10875: lsl.l %d2, %d1 # shift lo(man)
10876: add.l &32, %d2 # add 32 to shft amount
10877:
10878: mov.l %d1, FTEMP_HI(%a0) # store hi(man)
10879: clr.l FTEMP_LO(%a0) # lo(man) is now zero
10880:
10881: mov.l %d2, %d0 # return shift amount
10882:
10883: mov.l (%sp)+, %d3 # restore temp regs
10884: mov.l (%sp)+, %d2
10885:
10886: rts
10887:
10888: #########################################################################
10889: # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
10890: # - returns corresponding optype tag #
10891: # #
10892: # XDEF **************************************************************** #
10893: # unnorm_fix() #
10894: # #
10895: # XREF **************************************************************** #
10896: # norm() - normalize the mantissa #
10897: # #
10898: # INPUT *************************************************************** #
10899: # a0 = pointer to unnormalized extended precision number #
10900: # #
10901: # OUTPUT ************************************************************** #
CVSweb