blob: 4aedef973cf6fbd02cc67ea65bcbcede0da21c68 [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3M68000 Hi-Performance Microprocessor Division
4M68060 Software Package
5Production Release P1.00 -- October 10, 1994
6
7M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10To the maximum extent permitted by applicable law,
11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13and any warranty against infringement with regard to the SOFTWARE
14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16To the maximum extent permitted by applicable law,
17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24so long as this entire notice is retained without alteration in any modified and/or
25redistributed versions, and that such modified versions are clearly identified as such.
26No licenses are granted by implication, estoppel or otherwise under any patents
27or trademarks of Motorola, Inc.
28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29# freal.s:
30# This file is appended to the top of the 060FPSP package
31# and contains the entry points into the package. The user, in
32# effect, branches to one of the branch table entries located
33# after _060FPSP_TABLE.
34# Also, subroutine stubs exist in this file (_fpsp_done for
35# example) that are referenced by the FPSP package itself in order
36# to call a given routine. The stub routine actually performs the
37# callout. The FPSP code does a "bsr" to the stub routine. This
38# extra layer of hierarchy adds a slight performance penalty but
39# it makes the FPSP code easier to read and more mainatinable.
40#
41
42set _off_bsun, 0x00
43set _off_snan, 0x04
44set _off_operr, 0x08
45set _off_ovfl, 0x0c
46set _off_unfl, 0x10
47set _off_dz, 0x14
48set _off_inex, 0x18
49set _off_fline, 0x1c
50set _off_fpu_dis, 0x20
51set _off_trap, 0x24
52set _off_trace, 0x28
53set _off_access, 0x2c
54set _off_done, 0x30
55
56set _off_imr, 0x40
57set _off_dmr, 0x44
58set _off_dmw, 0x48
59set _off_irw, 0x4c
60set _off_irl, 0x50
61set _off_drb, 0x54
62set _off_drw, 0x58
63set _off_drl, 0x5c
64set _off_dwb, 0x60
65set _off_dww, 0x64
66set _off_dwl, 0x68
67
68_060FPSP_TABLE:
69
70###############################################################
71
72# Here's the table of ENTRY POINTS for those linking the package.
73 bra.l _fpsp_snan
74 short 0x0000
75 bra.l _fpsp_operr
76 short 0x0000
77 bra.l _fpsp_ovfl
78 short 0x0000
79 bra.l _fpsp_unfl
80 short 0x0000
81 bra.l _fpsp_dz
82 short 0x0000
83 bra.l _fpsp_inex
84 short 0x0000
85 bra.l _fpsp_fline
86 short 0x0000
87 bra.l _fpsp_unsupp
88 short 0x0000
89 bra.l _fpsp_effadd
90 short 0x0000
91
92 space 56
93
94###############################################################
95 global _fpsp_done
96_fpsp_done:
97 mov.l %d0,-(%sp)
98 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
100 mov.l 0x4(%sp),%d0
101 rtd &0x4
102
103 global _real_ovfl
104_real_ovfl:
105 mov.l %d0,-(%sp)
106 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
108 mov.l 0x4(%sp),%d0
109 rtd &0x4
110
111 global _real_unfl
112_real_unfl:
113 mov.l %d0,-(%sp)
114 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
116 mov.l 0x4(%sp),%d0
117 rtd &0x4
118
119 global _real_inex
120_real_inex:
121 mov.l %d0,-(%sp)
122 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
124 mov.l 0x4(%sp),%d0
125 rtd &0x4
126
127 global _real_bsun
128_real_bsun:
129 mov.l %d0,-(%sp)
130 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
132 mov.l 0x4(%sp),%d0
133 rtd &0x4
134
135 global _real_operr
136_real_operr:
137 mov.l %d0,-(%sp)
138 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
140 mov.l 0x4(%sp),%d0
141 rtd &0x4
142
143 global _real_snan
144_real_snan:
145 mov.l %d0,-(%sp)
146 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
148 mov.l 0x4(%sp),%d0
149 rtd &0x4
150
151 global _real_dz
152_real_dz:
153 mov.l %d0,-(%sp)
154 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
156 mov.l 0x4(%sp),%d0
157 rtd &0x4
158
159 global _real_fline
160_real_fline:
161 mov.l %d0,-(%sp)
162 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
164 mov.l 0x4(%sp),%d0
165 rtd &0x4
166
167 global _real_fpu_disabled
168_real_fpu_disabled:
169 mov.l %d0,-(%sp)
170 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
172 mov.l 0x4(%sp),%d0
173 rtd &0x4
174
175 global _real_trap
176_real_trap:
177 mov.l %d0,-(%sp)
178 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
180 mov.l 0x4(%sp),%d0
181 rtd &0x4
182
183 global _real_trace
184_real_trace:
185 mov.l %d0,-(%sp)
186 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
188 mov.l 0x4(%sp),%d0
189 rtd &0x4
190
191 global _real_access
192_real_access:
193 mov.l %d0,-(%sp)
194 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
196 mov.l 0x4(%sp),%d0
197 rtd &0x4
198
199#######################################
200
201 global _imem_read
202_imem_read:
203 mov.l %d0,-(%sp)
204 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
206 mov.l 0x4(%sp),%d0
207 rtd &0x4
208
209 global _dmem_read
210_dmem_read:
211 mov.l %d0,-(%sp)
212 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
214 mov.l 0x4(%sp),%d0
215 rtd &0x4
216
217 global _dmem_write
218_dmem_write:
219 mov.l %d0,-(%sp)
220 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
222 mov.l 0x4(%sp),%d0
223 rtd &0x4
224
225 global _imem_read_word
226_imem_read_word:
227 mov.l %d0,-(%sp)
228 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
230 mov.l 0x4(%sp),%d0
231 rtd &0x4
232
233 global _imem_read_long
234_imem_read_long:
235 mov.l %d0,-(%sp)
236 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
238 mov.l 0x4(%sp),%d0
239 rtd &0x4
240
241 global _dmem_read_byte
242_dmem_read_byte:
243 mov.l %d0,-(%sp)
244 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
246 mov.l 0x4(%sp),%d0
247 rtd &0x4
248
249 global _dmem_read_word
250_dmem_read_word:
251 mov.l %d0,-(%sp)
252 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
254 mov.l 0x4(%sp),%d0
255 rtd &0x4
256
257 global _dmem_read_long
258_dmem_read_long:
259 mov.l %d0,-(%sp)
260 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
262 mov.l 0x4(%sp),%d0
263 rtd &0x4
264
265 global _dmem_write_byte
266_dmem_write_byte:
267 mov.l %d0,-(%sp)
268 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
270 mov.l 0x4(%sp),%d0
271 rtd &0x4
272
273 global _dmem_write_word
274_dmem_write_word:
275 mov.l %d0,-(%sp)
276 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
278 mov.l 0x4(%sp),%d0
279 rtd &0x4
280
281 global _dmem_write_long
282_dmem_write_long:
283 mov.l %d0,-(%sp)
284 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
286 mov.l 0x4(%sp),%d0
287 rtd &0x4
288
289#
290# This file contains a set of define statements for constants
291# in order to promote readability within the corecode itself.
292#
293
294set LOCAL_SIZE, 192 # stack frame size(bytes)
295set LV, -LOCAL_SIZE # stack offset
296
297set EXC_SR, 0x4 # stack status register
298set EXC_PC, 0x6 # stack pc
299set EXC_VOFF, 0xa # stacked vector offset
300set EXC_EA, 0xc # stacked <ea>
301
302set EXC_FP, 0x0 # frame pointer
303
304set EXC_AREGS, -68 # offset of all address regs
305set EXC_DREGS, -100 # offset of all data regs
306set EXC_FPREGS, -36 # offset of all fp regs
307
308set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
309set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
310set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
311set EXC_A5, EXC_AREGS+(5*4)
312set EXC_A4, EXC_AREGS+(4*4)
313set EXC_A3, EXC_AREGS+(3*4)
314set EXC_A2, EXC_AREGS+(2*4)
315set EXC_A1, EXC_AREGS+(1*4)
316set EXC_A0, EXC_AREGS+(0*4)
317set EXC_D7, EXC_DREGS+(7*4)
318set EXC_D6, EXC_DREGS+(6*4)
319set EXC_D5, EXC_DREGS+(5*4)
320set EXC_D4, EXC_DREGS+(4*4)
321set EXC_D3, EXC_DREGS+(3*4)
322set EXC_D2, EXC_DREGS+(2*4)
323set EXC_D1, EXC_DREGS+(1*4)
324set EXC_D0, EXC_DREGS+(0*4)
325
326set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
327set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
328set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
329
330set FP_SCR1, LV+80 # fp scratch 1
331set FP_SCR1_EX, FP_SCR1+0
332set FP_SCR1_SGN, FP_SCR1+2
333set FP_SCR1_HI, FP_SCR1+4
334set FP_SCR1_LO, FP_SCR1+8
335
336set FP_SCR0, LV+68 # fp scratch 0
337set FP_SCR0_EX, FP_SCR0+0
338set FP_SCR0_SGN, FP_SCR0+2
339set FP_SCR0_HI, FP_SCR0+4
340set FP_SCR0_LO, FP_SCR0+8
341
342set FP_DST, LV+56 # fp destination operand
343set FP_DST_EX, FP_DST+0
344set FP_DST_SGN, FP_DST+2
345set FP_DST_HI, FP_DST+4
346set FP_DST_LO, FP_DST+8
347
348set FP_SRC, LV+44 # fp source operand
349set FP_SRC_EX, FP_SRC+0
350set FP_SRC_SGN, FP_SRC+2
351set FP_SRC_HI, FP_SRC+4
352set FP_SRC_LO, FP_SRC+8
353
354set USER_FPIAR, LV+40 # FP instr address register
355
356set USER_FPSR, LV+36 # FP status register
357set FPSR_CC, USER_FPSR+0 # FPSR condition codes
358set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
359set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
360set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
361
362set USER_FPCR, LV+32 # FP control register
363set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
364set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
365
366set L_SCR3, LV+28 # integer scratch 3
367set L_SCR2, LV+24 # integer scratch 2
368set L_SCR1, LV+20 # integer scratch 1
369
370set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
371
372set EXC_TEMP2, LV+24 # temporary space
373set EXC_TEMP, LV+16 # temporary space
374
375set DTAG, LV+15 # destination operand type
376set STAG, LV+14 # source operand type
377
378set SPCOND_FLG, LV+10 # flag: special case (see below)
379
380set EXC_CC, LV+8 # saved condition codes
381set EXC_EXTWPTR, LV+4 # saved current PC (active)
382set EXC_EXTWORD, LV+2 # saved extension word
383set EXC_CMDREG, LV+2 # saved extension word
384set EXC_OPWORD, LV+0 # saved operation word
385
386################################
387
388# Helpful macros
389
390set FTEMP, 0 # offsets within an
391set FTEMP_EX, 0 # extended precision
392set FTEMP_SGN, 2 # value saved in memory.
393set FTEMP_HI, 4
394set FTEMP_LO, 8
395set FTEMP_GRS, 12
396
397set LOCAL, 0 # offsets within an
398set LOCAL_EX, 0 # extended precision
399set LOCAL_SGN, 2 # value saved in memory.
400set LOCAL_HI, 4
401set LOCAL_LO, 8
402set LOCAL_GRS, 12
403
404set DST, 0 # offsets within an
405set DST_EX, 0 # extended precision
406set DST_HI, 4 # value saved in memory.
407set DST_LO, 8
408
409set SRC, 0 # offsets within an
410set SRC_EX, 0 # extended precision
411set SRC_HI, 4 # value saved in memory.
412set SRC_LO, 8
413
414set SGL_LO, 0x3f81 # min sgl prec exponent
415set SGL_HI, 0x407e # max sgl prec exponent
416set DBL_LO, 0x3c01 # min dbl prec exponent
417set DBL_HI, 0x43fe # max dbl prec exponent
418set EXT_LO, 0x0 # min ext prec exponent
419set EXT_HI, 0x7ffe # max ext prec exponent
420
421set EXT_BIAS, 0x3fff # extended precision bias
422set SGL_BIAS, 0x007f # single precision bias
423set DBL_BIAS, 0x03ff # double precision bias
424
425set NORM, 0x00 # operand type for STAG/DTAG
426set ZERO, 0x01 # operand type for STAG/DTAG
427set INF, 0x02 # operand type for STAG/DTAG
428set QNAN, 0x03 # operand type for STAG/DTAG
429set DENORM, 0x04 # operand type for STAG/DTAG
430set SNAN, 0x05 # operand type for STAG/DTAG
431set UNNORM, 0x06 # operand type for STAG/DTAG
432
433##################
434# FPSR/FPCR bits #
435##################
436set neg_bit, 0x3 # negative result
437set z_bit, 0x2 # zero result
438set inf_bit, 0x1 # infinite result
439set nan_bit, 0x0 # NAN result
440
441set q_sn_bit, 0x7 # sign bit of quotient byte
442
443set bsun_bit, 7 # branch on unordered
444set snan_bit, 6 # signalling NAN
445set operr_bit, 5 # operand error
446set ovfl_bit, 4 # overflow
447set unfl_bit, 3 # underflow
448set dz_bit, 2 # divide by zero
449set inex2_bit, 1 # inexact result 2
450set inex1_bit, 0 # inexact result 1
451
452set aiop_bit, 7 # accrued inexact operation bit
453set aovfl_bit, 6 # accrued overflow bit
454set aunfl_bit, 5 # accrued underflow bit
455set adz_bit, 4 # accrued dz bit
456set ainex_bit, 3 # accrued inexact bit
457
458#############################
459# FPSR individual bit masks #
460#############################
461set neg_mask, 0x08000000 # negative bit mask (lw)
462set inf_mask, 0x02000000 # infinity bit mask (lw)
463set z_mask, 0x04000000 # zero bit mask (lw)
464set nan_mask, 0x01000000 # nan bit mask (lw)
465
466set neg_bmask, 0x08 # negative bit mask (byte)
467set inf_bmask, 0x02 # infinity bit mask (byte)
468set z_bmask, 0x04 # zero bit mask (byte)
469set nan_bmask, 0x01 # nan bit mask (byte)
470
471set bsun_mask, 0x00008000 # bsun exception mask
472set snan_mask, 0x00004000 # snan exception mask
473set operr_mask, 0x00002000 # operr exception mask
474set ovfl_mask, 0x00001000 # overflow exception mask
475set unfl_mask, 0x00000800 # underflow exception mask
476set dz_mask, 0x00000400 # dz exception mask
477set inex2_mask, 0x00000200 # inex2 exception mask
478set inex1_mask, 0x00000100 # inex1 exception mask
479
480set aiop_mask, 0x00000080 # accrued illegal operation
481set aovfl_mask, 0x00000040 # accrued overflow
482set aunfl_mask, 0x00000020 # accrued underflow
483set adz_mask, 0x00000010 # accrued divide by zero
484set ainex_mask, 0x00000008 # accrued inexact
485
486######################################
487# FPSR combinations used in the FPSP #
488######################################
489set dzinf_mask, inf_mask+dz_mask+adz_mask
490set opnan_mask, nan_mask+operr_mask+aiop_mask
491set nzi_mask, 0x01ffffff #clears N, Z, and I
492set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
494set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495set inx1a_mask, inex1_mask+ainex_mask
496set inx2a_mask, inex2_mask+ainex_mask
497set snaniop_mask, nan_mask+snan_mask+aiop_mask
498set snaniop2_mask, snan_mask+aiop_mask
499set naniop_mask, nan_mask+aiop_mask
500set neginf_mask, neg_mask+inf_mask
501set infaiop_mask, inf_mask+aiop_mask
502set negz_mask, neg_mask+z_mask
503set opaop_mask, operr_mask+aiop_mask
504set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
505set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
506
507#########
508# misc. #
509#########
510set rnd_stky_bit, 29 # stky bit pos in longword
511
512set sign_bit, 0x7 # sign bit
513set signan_bit, 0x6 # signalling nan bit
514
515set sgl_thresh, 0x3f81 # minimum sgl exponent
516set dbl_thresh, 0x3c01 # minimum dbl exponent
517
518set x_mode, 0x0 # extended precision
519set s_mode, 0x4 # single precision
520set d_mode, 0x8 # double precision
521
522set rn_mode, 0x0 # round-to-nearest
523set rz_mode, 0x1 # round-to-zero
524set rm_mode, 0x2 # round-tp-minus-infinity
525set rp_mode, 0x3 # round-to-plus-infinity
526
527set mantissalen, 64 # length of mantissa in bits
528
529set BYTE, 1 # len(byte) == 1 byte
530set WORD, 2 # len(word) == 2 bytes
531set LONG, 4 # len(longword) == 2 bytes
532
533set BSUN_VEC, 0xc0 # bsun vector offset
534set INEX_VEC, 0xc4 # inexact vector offset
535set DZ_VEC, 0xc8 # dz vector offset
536set UNFL_VEC, 0xcc # unfl vector offset
537set OPERR_VEC, 0xd0 # operr vector offset
538set OVFL_VEC, 0xd4 # ovfl vector offset
539set SNAN_VEC, 0xd8 # snan vector offset
540
541###########################
542# SPecial CONDition FLaGs #
543###########################
544set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
545set fbsun_flg, 0x02 # flag bit: bsun exception
546set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
547set mda7_flg, 0x08 # flag bit: -(a7) <ea>
548set fmovm_flg, 0x40 # flag bit: fmovm instruction
549set immed_flg, 0x80 # flag bit: &<data> <ea>
550
551set ftrapcc_bit, 0x0
552set fbsun_bit, 0x1
553set mia7_bit, 0x2
554set mda7_bit, 0x3
555set immed_bit, 0x7
556
557##################################
558# TRANSCENDENTAL "LAST-OP" FLAGS #
559##################################
560set FMUL_OP, 0x0 # fmul instr performed last
561set FDIV_OP, 0x1 # fdiv performed last
562set FADD_OP, 0x2 # fadd performed last
563set FMOV_OP, 0x3 # fmov performed last
564
565#############
566# CONSTANTS #
567#############
568T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
569T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
570
571PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573
574TWOBYPI:
575 long 0x3FE45F30,0x6DC9C883
576
577#########################################################################
578# XDEF **************************************************************** #
579# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
580# #
581# This handler should be the first code executed upon taking the #
582# FP Overflow exception in an operating system. #
583# #
584# XREF **************************************************************** #
585# _imem_read_long() - read instruction longword #
586# fix_skewed_ops() - adjust src operand in fsave frame #
587# set_tag_x() - determine optype of src/dst operands #
588# store_fpreg() - store opclass 0 or 2 result to FP regfile #
589# unnorm_fix() - change UNNORM operands to NORM or ZERO #
590# load_fpn2() - load dst operand from FP regfile #
591# fout() - emulate an opclass 3 instruction #
592# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
593# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
594# _real_ovfl() - "callout" for Overflow exception enabled code #
595# _real_inex() - "callout" for Inexact exception enabled code #
596# _real_trace() - "callout" for Trace exception code #
597# #
598# INPUT *************************************************************** #
599# - The system stack contains the FP Ovfl exception stack frame #
600# - The fsave frame contains the source operand #
601# #
602# OUTPUT ************************************************************** #
603# Overflow Exception enabled: #
604# - The system stack is unchanged #
605# - The fsave frame contains the adjusted src op for opclass 0,2 #
606# Overflow Exception disabled: #
607# - The system stack is unchanged #
608# - The "exception present" flag in the fsave frame is cleared #
609# #
610# ALGORITHM *********************************************************** #
611# On the 060, if an FP overflow is present as the result of any #
612# instruction, the 060 will take an overflow exception whether the #
613# exception is enabled or disabled in the FPCR. For the disabled case, #
614# This handler emulates the instruction to determine what the correct #
615# default result should be for the operation. This default result is #
616# then stored in either the FP regfile, data regfile, or memory. #
617# Finally, the handler exits through the "callout" _fpsp_done() #
618# denoting that no exceptional conditions exist within the machine. #
619# If the exception is enabled, then this handler must create the #
620# exceptional operand and plave it in the fsave state frame, and store #
621# the default result (only if the instruction is opclass 3). For #
622# exceptions enabled, this handler must exit through the "callout" #
623# _real_ovfl() so that the operating system enabled overflow handler #
624# can handle this case. #
625# Two other conditions exist. First, if overflow was disabled #
626# but the inexact exception was enabled, this handler must exit #
627# through the "callout" _real_inex() regardless of whether the result #
628# was inexact. #
629# Also, in the case of an opclass three instruction where #
630# overflow was disabled and the trace exception was enabled, this #
631# handler must exit through the "callout" _real_trace(). #
632# #
633#########################################################################
634
635 global _fpsp_ovfl
636_fpsp_ovfl:
637
638#$# sub.l &24,%sp # make room for src/dst
639
640 link.w %a6,&-LOCAL_SIZE # init stack frame
641
642 fsave FP_SRC(%a6) # grab the "busy" frame
643
644 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
645 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
647
648# the FPIAR holds the "current PC" of the faulting instruction
649 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
651 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
652 bsr.l _imem_read_long # fetch the instruction words
653 mov.l %d0,EXC_OPWORD(%a6)
654
655##############################################################################
656
657 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
658 bne.w fovfl_out
659
660
661 lea FP_SRC(%a6),%a0 # pass: ptr to src op
662 bsr.l fix_skewed_ops # fix src op
663
664# since, I believe, only NORMs and DENORMs can come through here,
665# maybe we can avoid the subroutine call.
666 lea FP_SRC(%a6),%a0 # pass: ptr to src op
667 bsr.l set_tag_x # tag the operand type
668 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
669
670# bit five of the fp extension word separates the monadic and dyadic operations
671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672# will never take this exception.
673 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
674 beq.b fovfl_extract # monadic
675
676 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677 bsr.l load_fpn2 # load dst into FP_DST
678
679 lea FP_DST(%a6),%a0 # pass: ptr to dst op
680 bsr.l set_tag_x # tag the operand type
681 cmpi.b %d0,&UNNORM # is operand an UNNORM?
682 bne.b fovfl_op2_done # no
683 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
684fovfl_op2_done:
685 mov.b %d0,DTAG(%a6) # save dst optype tag
686
687fovfl_extract:
688
689#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695
696 clr.l %d0
697 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
698
699 mov.b 1+EXC_CMDREG(%a6),%d1
700 andi.w &0x007f,%d1 # extract extension
701
702 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703
704 fmov.l &0x0,%fpcr # zero current control regs
705 fmov.l &0x0,%fpsr
706
707 lea FP_SRC(%a6),%a0
708 lea FP_DST(%a6),%a1
709
710# maybe we can make these entry points ONLY the OVFL entry points of each routine.
711 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712 jsr (tbl_unsupp.l,%pc,%d1.l*1)
713
714# the operation has been emulated. the result is in fp0.
715# the EXOP, if an exception occurred, is in fp1.
716# we must save the default result regardless of whether
717# traps are enabled or disabled.
718 bfextu EXC_CMDREG(%a6){&6:&3},%d0
719 bsr.l store_fpreg
720
721# the exceptional possibilities we have left ourselves with are ONLY overflow
722# and inexact. and, the inexact is such that overflow occurred and was disabled
723# but inexact was enabled.
724 btst &ovfl_bit,FPCR_ENABLE(%a6)
725 bne.b fovfl_ovfl_on
726
727 btst &inex2_bit,FPCR_ENABLE(%a6)
728 bne.b fovfl_inex_on
729
730 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
731 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
733
734 unlk %a6
735#$# add.l &24,%sp
736 bra.l _fpsp_done
737
738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739# in fp1. now, simply jump to _real_ovfl()!
740fovfl_ovfl_on:
741 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
742
743 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
744
745 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
746 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
748
749 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
750
751 unlk %a6
752
753 bra.l _real_ovfl
754
755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756# we must jump to real_inex().
757fovfl_inex_on:
758
759 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
760
761 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
762 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
763
764 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
765 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
767
768 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
769
770 unlk %a6
771
772 bra.l _real_inex
773
774########################################################################
775fovfl_out:
776
777
778#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781
782# the src operand is definitely a NORM(!), so tag it as such
783 mov.b &NORM,STAG(%a6) # set src optype tag
784
785 clr.l %d0
786 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
787
788 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789
790 fmov.l &0x0,%fpcr # zero current control regs
791 fmov.l &0x0,%fpsr
792
793 lea FP_SRC(%a6),%a0 # pass ptr to src operand
794
795 bsr.l fout
796
797 btst &ovfl_bit,FPCR_ENABLE(%a6)
798 bne.w fovfl_ovfl_on
799
800 btst &inex2_bit,FPCR_ENABLE(%a6)
801 bne.w fovfl_inex_on
802
803 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
804 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
806
807 unlk %a6
808#$# add.l &24,%sp
809
810 btst &0x7,(%sp) # is trace on?
811 beq.l _fpsp_done # no
812
813 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
814 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
815 bra.l _real_trace
816
817#########################################################################
818# XDEF **************************************************************** #
819# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
820# #
821# This handler should be the first code executed upon taking the #
822# FP Underflow exception in an operating system. #
823# #
824# XREF **************************************************************** #
825# _imem_read_long() - read instruction longword #
826# fix_skewed_ops() - adjust src operand in fsave frame #
827# set_tag_x() - determine optype of src/dst operands #
828# store_fpreg() - store opclass 0 or 2 result to FP regfile #
829# unnorm_fix() - change UNNORM operands to NORM or ZERO #
830# load_fpn2() - load dst operand from FP regfile #
831# fout() - emulate an opclass 3 instruction #
832# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
833# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
834# _real_ovfl() - "callout" for Overflow exception enabled code #
835# _real_inex() - "callout" for Inexact exception enabled code #
836# _real_trace() - "callout" for Trace exception code #
837# #
838# INPUT *************************************************************** #
839# - The system stack contains the FP Unfl exception stack frame #
840# - The fsave frame contains the source operand #
841# #
842# OUTPUT ************************************************************** #
843# Underflow Exception enabled: #
844# - The system stack is unchanged #
845# - The fsave frame contains the adjusted src op for opclass 0,2 #
846# Underflow Exception disabled: #
847# - The system stack is unchanged #
848# - The "exception present" flag in the fsave frame is cleared #
849# #
850# ALGORITHM *********************************************************** #
851# On the 060, if an FP underflow is present as the result of any #
852# instruction, the 060 will take an underflow exception whether the #
853# exception is enabled or disabled in the FPCR. For the disabled case, #
854# This handler emulates the instruction to determine what the correct #
855# default result should be for the operation. This default result is #
856# then stored in either the FP regfile, data regfile, or memory. #
857# Finally, the handler exits through the "callout" _fpsp_done() #
858# denoting that no exceptional conditions exist within the machine. #
859# If the exception is enabled, then this handler must create the #
860# exceptional operand and plave it in the fsave state frame, and store #
861# the default result (only if the instruction is opclass 3). For #
862# exceptions enabled, this handler must exit through the "callout" #
863# _real_unfl() so that the operating system enabled overflow handler #
864# can handle this case. #
865# Two other conditions exist. First, if underflow was disabled #
866# but the inexact exception was enabled and the result was inexact, #
867# this handler must exit through the "callout" _real_inex(). #
868# was inexact. #
869# Also, in the case of an opclass three instruction where #
870# underflow was disabled and the trace exception was enabled, this #
871# handler must exit through the "callout" _real_trace(). #
872# #
873#########################################################################
874
875 global _fpsp_unfl
876_fpsp_unfl:
877
878#$# sub.l &24,%sp # make room for src/dst
879
880 link.w %a6,&-LOCAL_SIZE # init stack frame
881
882 fsave FP_SRC(%a6) # grab the "busy" frame
883
884 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
885 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
887
888# the FPIAR holds the "current PC" of the faulting instruction
889 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
891 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
892 bsr.l _imem_read_long # fetch the instruction words
893 mov.l %d0,EXC_OPWORD(%a6)
894
895##############################################################################
896
897 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
898 bne.w funfl_out
899
900
901 lea FP_SRC(%a6),%a0 # pass: ptr to src op
902 bsr.l fix_skewed_ops # fix src op
903
904 lea FP_SRC(%a6),%a0 # pass: ptr to src op
905 bsr.l set_tag_x # tag the operand type
906 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
907
908# bit five of the fp ext word separates the monadic and dyadic operations
909# that can pass through fpsp_unfl(). remember that fcmp, and ftst
910# will never take this exception.
911 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
912 beq.b funfl_extract # monadic
913
914# now, what's left that's not dyadic is fsincos. we can distinguish it
915# from all dyadics by the '0110xxx pattern
916 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
917 bne.b funfl_extract # yes
918
919 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920 bsr.l load_fpn2 # load dst into FP_DST
921
922 lea FP_DST(%a6),%a0 # pass: ptr to dst op
923 bsr.l set_tag_x # tag the operand type
924 cmpi.b %d0,&UNNORM # is operand an UNNORM?
925 bne.b funfl_op2_done # no
926 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
927funfl_op2_done:
928 mov.b %d0,DTAG(%a6) # save dst optype tag
929
930funfl_extract:
931
932#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938
939 clr.l %d0
940 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
941
942 mov.b 1+EXC_CMDREG(%a6),%d1
943 andi.w &0x007f,%d1 # extract extension
944
945 andi.l &0x00ff01ff,USER_FPSR(%a6)
946
947 fmov.l &0x0,%fpcr # zero current control regs
948 fmov.l &0x0,%fpsr
949
950 lea FP_SRC(%a6),%a0
951 lea FP_DST(%a6),%a1
952
953# maybe we can make these entry points ONLY the OVFL entry points of each routine.
954 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955 jsr (tbl_unsupp.l,%pc,%d1.l*1)
956
957 bfextu EXC_CMDREG(%a6){&6:&3},%d0
958 bsr.l store_fpreg
959
960# The `060 FPU multiplier hardware is such that if the result of a
961# multiply operation is the smallest possible normalized number
962# (0x00000000_80000000_00000000), then the machine will take an
963# underflow exception. Since this is incorrect, we need to check
964# if our emulation, after re-doing the operation, decided that
965# no underflow was called for. We do these checks only in
966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967# special case will simply exit gracefully with the correct result.
968
969# the exceptional possibilities we have left ourselves with are ONLY overflow
970# and inexact. and, the inexact is such that overflow occurred and was disabled
971# but inexact was enabled.
972 btst &unfl_bit,FPCR_ENABLE(%a6)
973 bne.b funfl_unfl_on
974
975funfl_chkinex:
976 btst &inex2_bit,FPCR_ENABLE(%a6)
977 bne.b funfl_inex_on
978
979funfl_exit:
980 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
981 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
983
984 unlk %a6
985#$# add.l &24,%sp
986 bra.l _fpsp_done
987
988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989# in fp1 (don't forget to save fp0). what to do now?
990# well, we simply have to get to go to _real_unfl()!
991funfl_unfl_on:
992
993# The `060 FPU multiplier hardware is such that if the result of a
994# multiply operation is the smallest possible normalized number
995# (0x00000000_80000000_00000000), then the machine will take an
996# underflow exception. Since this is incorrect, we check here to see
997# if our emulation, after re-doing the operation, decided that
998# no underflow was called for.
999 btst &unfl_bit,FPSR_EXCEPT(%a6)
1000 beq.w funfl_chkinex
1001
1002funfl_unfl_on2:
1003 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1004
1005 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1006
1007 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1008 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1010
1011 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1012
1013 unlk %a6
1014
1015 bra.l _real_unfl
1016
1017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018# we must jump to real_inex().
1019funfl_inex_on:
1020
1021# The `060 FPU multiplier hardware is such that if the result of a
1022# multiply operation is the smallest possible normalized number
1023# (0x00000000_80000000_00000000), then the machine will take an
1024# underflow exception.
1025# But, whether bogus or not, if inexact is enabled AND it occurred,
1026# then we have to branch to real_inex.
1027
1028 btst &inex2_bit,FPSR_EXCEPT(%a6)
1029 beq.w funfl_exit
1030
1031funfl_inex_on2:
1032
1033 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1034
1035 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1036 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1037
1038 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1039 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1041
1042 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1043
1044 unlk %a6
1045
1046 bra.l _real_inex
1047
1048#######################################################################
1049funfl_out:
1050
1051
1052#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055
1056# the src operand is definitely a NORM(!), so tag it as such
1057 mov.b &NORM,STAG(%a6) # set src optype tag
1058
1059 clr.l %d0
1060 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1061
1062 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063
1064 fmov.l &0x0,%fpcr # zero current control regs
1065 fmov.l &0x0,%fpsr
1066
1067 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1068
1069 bsr.l fout
1070
1071 btst &unfl_bit,FPCR_ENABLE(%a6)
1072 bne.w funfl_unfl_on2
1073
1074 btst &inex2_bit,FPCR_ENABLE(%a6)
1075 bne.w funfl_inex_on2
1076
1077 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1078 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1080
1081 unlk %a6
1082#$# add.l &24,%sp
1083
1084 btst &0x7,(%sp) # is trace on?
1085 beq.l _fpsp_done # no
1086
1087 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1088 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1089 bra.l _real_trace
1090
1091#########################################################################
1092# XDEF **************************************************************** #
1093# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1094# Data Type" exception. #
1095# #
1096# This handler should be the first code executed upon taking the #
1097# FP Unimplemented Data Type exception in an operating system. #
1098# #
1099# XREF **************************************************************** #
1100# _imem_read_{word,long}() - read instruction word/longword #
1101# fix_skewed_ops() - adjust src operand in fsave frame #
1102# set_tag_x() - determine optype of src/dst operands #
1103# store_fpreg() - store opclass 0 or 2 result to FP regfile #
1104# unnorm_fix() - change UNNORM operands to NORM or ZERO #
1105# load_fpn2() - load dst operand from FP regfile #
1106# load_fpn1() - load src operand from FP regfile #
1107# fout() - emulate an opclass 3 instruction #
1108# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1109# _real_inex() - "callout" to operating system inexact handler #
1110# _fpsp_done() - "callout" for exit; work all done #
1111# _real_trace() - "callout" for Trace enabled exception #
1112# funimp_skew() - adjust fsave src ops to "incorrect" value #
1113# _real_snan() - "callout" for SNAN exception #
1114# _real_operr() - "callout" for OPERR exception #
1115# _real_ovfl() - "callout" for OVFL exception #
1116# _real_unfl() - "callout" for UNFL exception #
1117# get_packed() - fetch packed operand from memory #
1118# #
1119# INPUT *************************************************************** #
1120# - The system stack contains the "Unimp Data Type" stk frame #
1121# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1122# #
1123# OUTPUT ************************************************************** #
1124# If Inexact exception (opclass 3): #
1125# - The system stack is changed to an Inexact exception stk frame #
1126# If SNAN exception (opclass 3): #
1127# - The system stack is changed to an SNAN exception stk frame #
1128# If OPERR exception (opclass 3): #
1129# - The system stack is changed to an OPERR exception stk frame #
1130# If OVFL exception (opclass 3): #
1131# - The system stack is changed to an OVFL exception stk frame #
1132# If UNFL exception (opclass 3): #
1133# - The system stack is changed to an UNFL exception stack frame #
1134# If Trace exception enabled: #
1135# - The system stack is changed to a Trace exception stack frame #
1136# Else: (normal case) #
1137# - Correct result has been stored as appropriate #
1138# #
1139# ALGORITHM *********************************************************** #
1140# Two main instruction types can enter here: (1) DENORM or UNNORM #
1141# unimplemented data types. These can be either opclass 0,2 or 3 #
1142# instructions, and (2) PACKED unimplemented data format instructions #
1143# also of opclasses 0,2, or 3. #
1144# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1145# operand from the fsave state frame and the dst operand (if dyadic) #
1146# from the FP register file. The instruction is then emulated by #
1147# choosing an emulation routine from a table of routines indexed by #
1148# instruction type. Once the instruction has been emulated and result #
1149# saved, then we check to see if any enabled exceptions resulted from #
1150# instruction emulation. If none, then we exit through the "callout" #
1151# _fpsp_done(). If there is an enabled FP exception, then we insert #
1152# this exception into the FPU in the fsave state frame and then exit #
1153# through _fpsp_done(). #
1154# PACKED opclass 0 and 2 is similar in how the instruction is #
1155# emulated and exceptions handled. The differences occur in how the #
1156# handler loads the packed op (by calling get_packed() routine) and #
1157# by the fact that a Trace exception could be pending for PACKED ops. #
1158# If a Trace exception is pending, then the current exception stack #
1159# frame is changed to a Trace exception stack frame and an exit is #
1160# made through _real_trace(). #
1161# For UNNORM/DENORM opclass 3, the actual move out to memory is #
1162# performed by calling the routine fout(). If no exception should occur #
1163# as the result of emulation, then an exit either occurs through #
1164# _fpsp_done() or through _real_trace() if a Trace exception is pending #
1165# (a Trace stack frame must be created here, too). If an FP exception #
1166# should occur, then we must create an exception stack frame of that #
1167# type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1169# emulation is performed in a similar manner. #
1170# #
1171#########################################################################
1172
1173#
1174# (1) DENORM and UNNORM (unimplemented) data types:
1175#
1176# post-instruction
1177# *****************
1178# * EA *
1179# pre-instruction * *
1180# ***************** *****************
1181# * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1182# ***************** *****************
1183# * Next * * Next *
1184# * PC * * PC *
1185# ***************** *****************
1186# * SR * * SR *
1187# ***************** *****************
1188#
1189# (2) PACKED format (unsupported) opclasses two and three:
1190# *****************
1191# * EA *
1192# * *
1193# *****************
1194# * 0x2 * 0x0dc *
1195# *****************
1196# * Next *
1197# * PC *
1198# *****************
1199# * SR *
1200# *****************
1201#
1202 global _fpsp_unsupp
1203_fpsp_unsupp:
1204
1205 link.w %a6,&-LOCAL_SIZE # init stack frame
1206
1207 fsave FP_SRC(%a6) # save fp state
1208
1209 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1210 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1212
1213 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1214 bne.b fu_s
1215fu_u:
1216 mov.l %usp,%a0 # fetch user stack pointer
1217 mov.l %a0,EXC_A7(%a6) # save on stack
1218 bra.b fu_cont
1219# if the exception is an opclass zero or two unimplemented data type
1220# exception, then the a7' calculated here is wrong since it doesn't
1221# stack an ea. however, we don't need an a7' for this case anyways.
1222fu_s:
1223 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1224 mov.l %a0,EXC_A7(%a6) # save on stack
1225
1226fu_cont:
1227
1228# the FPIAR holds the "current PC" of the faulting instruction
1229# the FPIAR should be set correctly for ALL exceptions passing through
1230# this point.
1231 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1233 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1234 bsr.l _imem_read_long # fetch the instruction words
1235 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1236
1237############################
1238
1239 clr.b SPCOND_FLG(%a6) # clear special condition flag
1240
1241# Separate opclass three (fpn-to-mem) ops since they have a different
1242# stack frame and protocol.
1243 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1244 bne.w fu_out # yes
1245
1246# Separate packed opclass two instructions.
1247 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1248 cmpi.b %d0,&0x13
1249 beq.w fu_in_pack
1250
1251
1252# I'm not sure at this point what FPSR bits are valid for this instruction.
1253# so, since the emulation routines re-create them anyways, zero exception field
1254 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255
1256 fmov.l &0x0,%fpcr # zero current control regs
1257 fmov.l &0x0,%fpsr
1258
1259# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260# precision format if the src format was single or double and the
1261# source data type was an INF, NAN, DENORM, or UNNORM
1262 lea FP_SRC(%a6),%a0 # pass ptr to input
1263 bsr.l fix_skewed_ops
1264
1265# we don't know whether the src operand or the dst operand (or both) is the
1266# UNNORM or DENORM. call the function that tags the operand type. if the
1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1269 bsr.l set_tag_x # tag the operand type
1270 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1271 bne.b fu_op2 # no
1272 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1273
1274fu_op2:
1275 mov.b %d0,STAG(%a6) # save src optype tag
1276
1277 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278
1279# bit five of the fp extension word separates the monadic and dyadic operations
1280# at this point
1281 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1282 beq.b fu_extract # monadic
1283 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1284 beq.b fu_extract # yes, so it's monadic, too
1285
1286 bsr.l load_fpn2 # load dst into FP_DST
1287
1288 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1289 bsr.l set_tag_x # tag the operand type
1290 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1291 bne.b fu_op2_done # no
1292 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1293fu_op2_done:
1294 mov.b %d0,DTAG(%a6) # save dst optype tag
1295
1296fu_extract:
1297 clr.l %d0
1298 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1299
1300 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301
1302 lea FP_SRC(%a6),%a0
1303 lea FP_DST(%a6),%a1
1304
1305 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1307
1308#
1309# Exceptions in order of precedence:
1310# BSUN : none
1311# SNAN : all dyadic ops
1312# OPERR : fsqrt(-NORM)
1313# OVFL : all except ftst,fcmp
1314# UNFL : all except ftst,fcmp
1315# DZ : fdiv
1316# INEX2 : all except ftst,fcmp
1317# INEX1 : none (packed doesn't go through here)
1318#
1319
1320# we determine the highest priority exception(if any) set by the
1321# emulation routine that has also been enabled by the user.
1322 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1323 bne.b fu_in_ena # some are enabled
1324
1325fu_in_cont:
1326# fcmp and ftst do not store any result.
1327 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1328 andi.b &0x38,%d0 # extract bits 3-5
1329 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1330 beq.b fu_in_exit # yes
1331
1332 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333 bsr.l store_fpreg # store the result
1334
1335fu_in_exit:
1336
1337 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1338 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1340
1341 unlk %a6
1342
1343 bra.l _fpsp_done
1344
1345fu_in_ena:
1346 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1347 bfffo %d0{&24:&8},%d0 # find highest priority exception
1348 bne.b fu_in_exc # there is at least one set
1349
1350#
1351# No exceptions occurred that were also enabled. Now:
1352#
1353# if (OVFL && ovfl_disabled && inexact_enabled) {
1354# branch to _real_inex() (even if the result was exact!);
1355# } else {
1356# save the result in the proper fp reg (unless the op is fcmp or ftst);
1357# return;
1358# }
1359#
1360 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361 beq.b fu_in_cont # no
1362
1363fu_in_ovflchk:
1364 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365 beq.b fu_in_cont # no
1366 bra.w fu_in_exc_ovfl # go insert overflow frame
1367
1368#
1369# An exception occurred and that exception was enabled:
1370#
1371# shift enabled exception field into lo byte of d0;
1372# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374# /*
1375# * this is the case where we must call _real_inex() now or else
1376# * there will be no other way to pass it the exceptional operand
1377# */
1378# call _real_inex();
1379# } else {
1380# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381# }
1382#
1383fu_in_exc:
1384 subi.l &24,%d0 # fix offset to be 0-8
1385 cmpi.b %d0,&0x6 # is exception INEX? (6)
1386 bne.b fu_in_exc_exit # no
1387
1388# the enabled exception was inexact
1389 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390 bne.w fu_in_exc_unfl # yes
1391 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392 bne.w fu_in_exc_ovfl # yes
1393
1394# here, we insert the correct fsave status value into the fsave frame for the
1395# corresponding exception. the operand in the fsave frame should be the original
1396# src operand.
1397fu_in_exc_exit:
1398 mov.l %d0,-(%sp) # save d0
1399 bsr.l funimp_skew # skew sgl or dbl inputs
1400 mov.l (%sp)+,%d0 # restore d0
1401
1402 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403
1404 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1405 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1407
1408 frestore FP_SRC(%a6) # restore src op
1409
1410 unlk %a6
1411
1412 bra.l _fpsp_done
1413
1414tbl_except:
1415 short 0xe000,0xe006,0xe004,0xe005
1416 short 0xe003,0xe002,0xe001,0xe001
1417
1418fu_in_exc_unfl:
1419 mov.w &0x4,%d0
1420 bra.b fu_in_exc_exit
1421fu_in_exc_ovfl:
1422 mov.w &0x03,%d0
1423 bra.b fu_in_exc_exit
1424
1425# If the input operand to this operation was opclass two and a single
1426# or double precision denorm, inf, or nan, the operand needs to be
1427# "corrected" in order to have the proper equivalent extended precision
1428# number.
1429 global fix_skewed_ops
1430fix_skewed_ops:
1431 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1433 beq.b fso_sgl # yes
1434 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1435 beq.b fso_dbl # yes
1436 rts # no
1437
1438fso_sgl:
1439 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1440 andi.w &0x7fff,%d0 # strip sign
1441 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1442 beq.b fso_sgl_dnrm_zero # yes
1443 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1444 beq.b fso_infnan # yes
1445 rts # no
1446
1447fso_sgl_dnrm_zero:
1448 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449 beq.b fso_zero # it's a skewed zero
1450fso_sgl_dnrm:
1451# here, we count on norm not to alter a0...
1452 bsr.l norm # normalize mantissa
1453 neg.w %d0 # -shft amt
1454 addi.w &0x3f81,%d0 # adjust new exponent
1455 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1456 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1457 rts
1458
1459fso_zero:
1460 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1461 rts
1462
1463fso_infnan:
1464 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1465 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1466 rts
1467
1468fso_dbl:
1469 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1470 andi.w &0x7fff,%d0 # strip sign
1471 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1472 beq.b fso_dbl_dnrm_zero # yes
1473 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1474 beq.b fso_infnan # yes
1475 rts # no
1476
1477fso_dbl_dnrm_zero:
1478 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479 bne.b fso_dbl_dnrm # it's a skewed denorm
1480 tst.l LOCAL_LO(%a0) # is it a zero?
1481 beq.b fso_zero # yes
1482fso_dbl_dnrm:
1483# here, we count on norm not to alter a0...
1484 bsr.l norm # normalize mantissa
1485 neg.w %d0 # -shft amt
1486 addi.w &0x3c01,%d0 # adjust new exponent
1487 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1488 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1489 rts
1490
1491#################################################################
1492
1493# fmove out took an unimplemented data type exception.
1494# the src operand is in FP_SRC. Call _fout() to write out the result and
1495# to determine which exceptions, if any, to take.
1496fu_out:
1497
1498# Separate packed move outs from the UNNORM and DENORM move outs.
1499 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1500 cmpi.b %d0,&0x3
1501 beq.w fu_out_pack
1502 cmpi.b %d0,&0x7
1503 beq.w fu_out_pack
1504
1505
1506# I'm not sure at this point what FPSR bits are valid for this instruction.
1507# so, since the emulation routines re-create them anyways, zero exception field.
1508# fmove out doesn't affect ccodes.
1509 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1510
1511 fmov.l &0x0,%fpcr # zero current control regs
1512 fmov.l &0x0,%fpsr
1513
1514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515# call here. just figure out what it is...
1516 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1517 andi.w &0x7fff,%d0 # strip sign
1518 beq.b fu_out_denorm # it's a DENORM
1519
1520 lea FP_SRC(%a6),%a0
1521 bsr.l unnorm_fix # yes; fix it
1522
1523 mov.b %d0,STAG(%a6)
1524
1525 bra.b fu_out_cont
1526fu_out_denorm:
1527 mov.b &DENORM,STAG(%a6)
1528fu_out_cont:
1529
1530 clr.l %d0
1531 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1532
1533 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1534
1535 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1536 bsr.l fout # call fmove out routine
1537
1538# Exceptions in order of precedence:
1539# BSUN : none
1540# SNAN : none
1541# OPERR : fmove.{b,w,l} out of large UNNORM
1542# OVFL : fmove.{s,d}
1543# UNFL : fmove.{s,d,x}
1544# DZ : none
1545# INEX2 : all
1546# INEX1 : none (packed doesn't travel through here)
1547
1548# determine the highest priority exception(if any) set by the
1549# emulation routine that has also been enabled by the user.
1550 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1551 bne.w fu_out_ena # some are enabled
1552
1553fu_out_done:
1554
1555 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1556
1557# on extended precision opclass three instructions using pre-decrement or
1558# post-increment addressing mode, the address register is not updated. is the
1559# address register was the stack pointer used from user mode, then let's update
1560# it here. if it was used from supervisor mode, then we have to handle this
1561# as a special case.
1562 btst &0x5,EXC_SR(%a6)
1563 bne.b fu_out_done_s
1564
1565 mov.l EXC_A7(%a6),%a0 # restore a7
1566 mov.l %a0,%usp
1567
1568fu_out_done_cont:
1569 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1570 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1572
1573 unlk %a6
1574
1575 btst &0x7,(%sp) # is trace on?
1576 bne.b fu_out_trace # yes
1577
1578 bra.l _fpsp_done
1579
1580# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581# ("fmov.x fpm,-(a7)") if so,
1582fu_out_done_s:
1583 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1584 bne.b fu_out_done_cont
1585
1586# the extended precision result is still in fp0. but, we need to save it
1587# somewhere on the stack until we can copy it to its final resting place.
1588# here, we're counting on the top of the stack to be the old place-holders
1589# for fp0/fp1 which have already been restored. that way, we can write
1590# over those destinations with the shifted stack frame.
1591 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1592
1593 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1594 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1596
1597 mov.l (%a6),%a6 # restore frame pointer
1598
1599 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601
1602# now, copy the result to the proper place on the stack
1603 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606
1607 add.l &LOCAL_SIZE-0x8,%sp
1608
1609 btst &0x7,(%sp)
1610 bne.b fu_out_trace
1611
1612 bra.l _fpsp_done
1613
1614fu_out_ena:
1615 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1616 bfffo %d0{&24:&8},%d0 # find highest priority exception
1617 bne.b fu_out_exc # there is at least one set
1618
1619# no exceptions were set.
1620# if a disabled overflow occurred and inexact was enabled but the result
1621# was exact, then a branch to _real_inex() is made.
1622 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623 beq.w fu_out_done # no
1624
1625fu_out_ovflchk:
1626 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627 beq.w fu_out_done # no
1628 bra.w fu_inex # yes
1629
1630#
1631# The fp move out that took the "Unimplemented Data Type" exception was
1632# being traced. Since the stack frames are similar, get the "current" PC
1633# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634#
1635# UNSUPP FRAME TRACE FRAME
1636# ***************** *****************
1637# * EA * * Current *
1638# * * * PC *
1639# ***************** *****************
1640# * 0x3 * 0x0dc * * 0x2 * 0x024 *
1641# ***************** *****************
1642# * Next * * Next *
1643# * PC * * PC *
1644# ***************** *****************
1645# * SR * * SR *
1646# ***************** *****************
1647#
1648fu_out_trace:
1649 mov.w &0x2024,0x6(%sp)
1650 fmov.l %fpiar,0x8(%sp)
1651 bra.l _real_trace
1652
1653# an exception occurred and that exception was enabled.
1654fu_out_exc:
1655 subi.l &24,%d0 # fix offset to be 0-8
1656
1657# we don't mess with the existing fsave frame. just re-insert it and
1658# jump to the "_real_{}()" handler...
1659 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1660 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1661
1662 swbeg &0x8
1663tbl_fu_out:
1664 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1665 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1666 short fu_operr - tbl_fu_out # OPERR
1667 short fu_ovfl - tbl_fu_out # OVFL
1668 short fu_unfl - tbl_fu_out # UNFL
1669 short tbl_fu_out - tbl_fu_out # DZ can't happen
1670 short fu_inex - tbl_fu_out # INEX2
1671 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1672
1673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674# frestore it.
1675fu_snan:
1676 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1677 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1679
1680 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1681 mov.w &0xe006,2+FP_SRC(%a6)
1682
1683 frestore FP_SRC(%a6)
1684
1685 unlk %a6
1686
1687
1688 bra.l _real_snan
1689
1690fu_operr:
1691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1694
1695 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1696 mov.w &0xe004,2+FP_SRC(%a6)
1697
1698 frestore FP_SRC(%a6)
1699
1700 unlk %a6
1701
1702
1703 bra.l _real_operr
1704
1705fu_ovfl:
1706 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1707
1708 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1709 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1711
1712 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1713 mov.w &0xe005,2+FP_SRC(%a6)
1714
1715 frestore FP_SRC(%a6) # restore EXOP
1716
1717 unlk %a6
1718
1719 bra.l _real_ovfl
1720
1721# underflow can happen for extended precision. extended precision opclass
1722# three instruction exceptions don't update the stack pointer. so, if the
1723# exception occurred from user mode, then simply update a7 and exit normally.
1724# if the exception occurred from supervisor mode, check if
1725fu_unfl:
1726 mov.l EXC_A6(%a6),(%a6) # restore a6
1727
1728 btst &0x5,EXC_SR(%a6)
1729 bne.w fu_unfl_s
1730
1731 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1732 mov.l %a0,%usp # to or not...
1733
1734fu_unfl_cont:
1735 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1736
1737 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1740
1741 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1742 mov.w &0xe003,2+FP_SRC(%a6)
1743
1744 frestore FP_SRC(%a6) # restore EXOP
1745
1746 unlk %a6
1747
1748 bra.l _real_unfl
1749
1750fu_unfl_s:
1751 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752 bne.b fu_unfl_cont
1753
1754# the extended precision result is still in fp0. but, we need to save it
1755# somewhere on the stack until we can copy it to its final resting place
1756# (where the exc frame is currently). make sure it's not at the top of the
1757# frame or it will get overwritten when the exc stack frame is shifted "down".
1758 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1759 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1760
1761 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1762 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1764
1765 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1766 mov.w &0xe003,2+FP_DST(%a6)
1767
1768 frestore FP_DST(%a6) # restore EXOP
1769
1770 mov.l (%a6),%a6 # restore frame pointer
1771
1772 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775
1776# now, copy the result to the proper place on the stack
1777 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780
1781 add.l &LOCAL_SIZE-0x8,%sp
1782
1783 bra.l _real_unfl
1784
1785# fmove in and out enter here.
1786fu_inex:
1787 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1788
1789 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1792
1793 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1794 mov.w &0xe001,2+FP_SRC(%a6)
1795
1796 frestore FP_SRC(%a6) # restore EXOP
1797
1798 unlk %a6
1799
1800
1801 bra.l _real_inex
1802
1803#########################################################################
1804#########################################################################
1805fu_in_pack:
1806
1807
1808# I'm not sure at this point what FPSR bits are valid for this instruction.
1809# so, since the emulation routines re-create them anyways, zero exception field
1810 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811
1812 fmov.l &0x0,%fpcr # zero current control regs
1813 fmov.l &0x0,%fpsr
1814
1815 bsr.l get_packed # fetch packed src operand
1816
1817 lea FP_SRC(%a6),%a0 # pass ptr to src
1818 bsr.l set_tag_x # set src optype tag
1819
1820 mov.b %d0,STAG(%a6) # save src optype tag
1821
1822 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823
1824# bit five of the fp extension word separates the monadic and dyadic operations
1825# at this point
1826 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1827 beq.b fu_extract_p # monadic
1828 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1829 beq.b fu_extract_p # yes, so it's monadic, too
1830
1831 bsr.l load_fpn2 # load dst into FP_DST
1832
1833 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1834 bsr.l set_tag_x # tag the operand type
1835 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1836 bne.b fu_op2_done_p # no
1837 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1838fu_op2_done_p:
1839 mov.b %d0,DTAG(%a6) # save dst optype tag
1840
1841fu_extract_p:
1842 clr.l %d0
1843 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1844
1845 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846
1847 lea FP_SRC(%a6),%a0
1848 lea FP_DST(%a6),%a1
1849
1850 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1852
1853#
1854# Exceptions in order of precedence:
1855# BSUN : none
1856# SNAN : all dyadic ops
1857# OPERR : fsqrt(-NORM)
1858# OVFL : all except ftst,fcmp
1859# UNFL : all except ftst,fcmp
1860# DZ : fdiv
1861# INEX2 : all except ftst,fcmp
1862# INEX1 : all
1863#
1864
1865# we determine the highest priority exception(if any) set by the
1866# emulation routine that has also been enabled by the user.
1867 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1868 bne.w fu_in_ena_p # some are enabled
1869
1870fu_in_cont_p:
1871# fcmp and ftst do not store any result.
1872 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1873 andi.b &0x38,%d0 # extract bits 3-5
1874 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1875 beq.b fu_in_exit_p # yes
1876
1877 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878 bsr.l store_fpreg # store the result
1879
1880fu_in_exit_p:
1881
1882 btst &0x5,EXC_SR(%a6) # user or supervisor?
1883 bne.w fu_in_exit_s_p # supervisor
1884
1885 mov.l EXC_A7(%a6),%a0 # update user a7
1886 mov.l %a0,%usp
1887
1888fu_in_exit_cont_p:
1889 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1890 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1892
1893 unlk %a6 # unravel stack frame
1894
1895 btst &0x7,(%sp) # is trace on?
1896 bne.w fu_trace_p # yes
1897
1898 bra.l _fpsp_done # exit to os
1899
1900# the exception occurred in supervisor mode. check to see if the
1901# addressing mode was (a7)+. if so, we'll need to shift the
1902# stack frame "up".
1903fu_in_exit_s_p:
1904 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905 beq.b fu_in_exit_cont_p # no
1906
1907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1910
1911 unlk %a6 # unravel stack frame
1912
1913# shift the stack frame "up". we don't really care about the <ea> field.
1914 mov.l 0x4(%sp),0x10(%sp)
1915 mov.l 0x0(%sp),0xc(%sp)
1916 add.l &0xc,%sp
1917
1918 btst &0x7,(%sp) # is trace on?
1919 bne.w fu_trace_p # yes
1920
1921 bra.l _fpsp_done # exit to os
1922
1923fu_in_ena_p:
1924 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1925 bfffo %d0{&24:&8},%d0 # find highest priority exception
1926 bne.b fu_in_exc_p # at least one was set
1927
1928#
1929# No exceptions occurred that were also enabled. Now:
1930#
1931# if (OVFL && ovfl_disabled && inexact_enabled) {
1932# branch to _real_inex() (even if the result was exact!);
1933# } else {
1934# save the result in the proper fp reg (unless the op is fcmp or ftst);
1935# return;
1936# }
1937#
1938 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939 beq.w fu_in_cont_p # no
1940
1941fu_in_ovflchk_p:
1942 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943 beq.w fu_in_cont_p # no
1944 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1945
1946#
1947# An exception occurred and that exception was enabled:
1948#
1949# shift enabled exception field into lo byte of d0;
1950# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952# /*
1953# * this is the case where we must call _real_inex() now or else
1954# * there will be no other way to pass it the exceptional operand
1955# */
1956# call _real_inex();
1957# } else {
1958# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959# }
1960#
1961fu_in_exc_p:
1962 subi.l &24,%d0 # fix offset to be 0-8
1963 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1964 blt.b fu_in_exc_exit_p # no
1965
1966# the enabled exception was inexact
1967 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968 bne.w fu_in_exc_unfl_p # yes
1969 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970 bne.w fu_in_exc_ovfl_p # yes
1971
1972# here, we insert the correct fsave status value into the fsave frame for the
1973# corresponding exception. the operand in the fsave frame should be the original
1974# src operand.
1975# as a reminder for future predicted pain and agony, we are passing in fsave the
1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978fu_in_exc_exit_p:
1979 btst &0x5,EXC_SR(%a6) # user or supervisor?
1980 bne.w fu_in_exc_exit_s_p # supervisor
1981
1982 mov.l EXC_A7(%a6),%a0 # update user a7
1983 mov.l %a0,%usp
1984
1985fu_in_exc_exit_cont_p:
1986 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987
1988 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1989 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1991
1992 frestore FP_SRC(%a6) # restore src op
1993
1994 unlk %a6
1995
1996 btst &0x7,(%sp) # is trace enabled?
1997 bne.w fu_trace_p # yes
1998
1999 bra.l _fpsp_done
2000
2001tbl_except_p:
2002 short 0xe000,0xe006,0xe004,0xe005
2003 short 0xe003,0xe002,0xe001,0xe001
2004
2005fu_in_exc_ovfl_p:
2006 mov.w &0x3,%d0
2007 bra.w fu_in_exc_exit_p
2008
2009fu_in_exc_unfl_p:
2010 mov.w &0x4,%d0
2011 bra.w fu_in_exc_exit_p
2012
2013fu_in_exc_exit_s_p:
2014 btst &mia7_bit,SPCOND_FLG(%a6)
2015 beq.b fu_in_exc_exit_cont_p
2016
2017 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018
2019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2020 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2022
2023 frestore FP_SRC(%a6) # restore src op
2024
2025 unlk %a6 # unravel stack frame
2026
2027# shift stack frame "up". who cares about <ea> field.
2028 mov.l 0x4(%sp),0x10(%sp)
2029 mov.l 0x0(%sp),0xc(%sp)
2030 add.l &0xc,%sp
2031
2032 btst &0x7,(%sp) # is trace on?
2033 bne.b fu_trace_p # yes
2034
2035 bra.l _fpsp_done # exit to os
2036
2037#
2038# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040# trace stack frame then jump to _real_trace().
2041#
2042# UNSUPP FRAME TRACE FRAME
2043# ***************** *****************
2044# * EA * * Current *
2045# * * * PC *
2046# ***************** *****************
2047# * 0x2 * 0x0dc * * 0x2 * 0x024 *
2048# ***************** *****************
2049# * Next * * Next *
2050# * PC * * PC *
2051# ***************** *****************
2052# * SR * * SR *
2053# ***************** *****************
2054fu_trace_p:
2055 mov.w &0x2024,0x6(%sp)
2056 fmov.l %fpiar,0x8(%sp)
2057
2058 bra.l _real_trace
2059
2060#########################################################
2061#########################################################
2062fu_out_pack:
2063
2064
2065# I'm not sure at this point what FPSR bits are valid for this instruction.
2066# so, since the emulation routines re-create them anyways, zero exception field.
2067# fmove out doesn't affect ccodes.
2068 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2069
2070 fmov.l &0x0,%fpcr # zero current control regs
2071 fmov.l &0x0,%fpsr
2072
2073 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2074 bsr.l load_fpn1
2075
2076# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077# able to detect all operand types.
2078 lea FP_SRC(%a6),%a0
2079 bsr.l set_tag_x # tag the operand type
2080 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2081 bne.b fu_op2_p # no
2082 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2083
2084fu_op2_p:
2085 mov.b %d0,STAG(%a6) # save src optype tag
2086
2087 clr.l %d0
2088 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2089
2090 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2091
2092 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2093 bsr.l fout # call fmove out routine
2094
2095# Exceptions in order of precedence:
2096# BSUN : no
2097# SNAN : yes
2098# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099# OVFL : no
2100# UNFL : no
2101# DZ : no
2102# INEX2 : yes
2103# INEX1 : no
2104
2105# determine the highest priority exception(if any) set by the
2106# emulation routine that has also been enabled by the user.
2107 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2108 bne.w fu_out_ena_p # some are enabled
2109
2110fu_out_exit_p:
2111 mov.l EXC_A6(%a6),(%a6) # restore a6
2112
2113 btst &0x5,EXC_SR(%a6) # user or supervisor?
2114 bne.b fu_out_exit_s_p # supervisor
2115
2116 mov.l EXC_A7(%a6),%a0 # update user a7
2117 mov.l %a0,%usp
2118
2119fu_out_exit_cont_p:
2120 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2121 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2123
2124 unlk %a6 # unravel stack frame
2125
2126 btst &0x7,(%sp) # is trace on?
2127 bne.w fu_trace_p # yes
2128
2129 bra.l _fpsp_done # exit to os
2130
2131# the exception occurred in supervisor mode. check to see if the
2132# addressing mode was -(a7). if so, we'll need to shift the
2133# stack frame "down".
2134fu_out_exit_s_p:
2135 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136 beq.b fu_out_exit_cont_p # no
2137
2138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2141
2142 mov.l (%a6),%a6 # restore frame pointer
2143
2144 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146
2147# now, copy the result to the proper place on the stack
2148 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151
2152 add.l &LOCAL_SIZE-0x8,%sp
2153
2154 btst &0x7,(%sp)
2155 bne.w fu_trace_p
2156
2157 bra.l _fpsp_done
2158
2159fu_out_ena_p:
2160 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2161 bfffo %d0{&24:&8},%d0 # find highest priority exception
2162 beq.w fu_out_exit_p
2163
2164 mov.l EXC_A6(%a6),(%a6) # restore a6
2165
2166# an exception occurred and that exception was enabled.
2167# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168fu_out_exc_p:
2169 cmpi.b %d0,&0x1a
2170 bgt.w fu_inex_p2
2171 beq.w fu_operr_p
2172
2173fu_snan_p:
2174 btst &0x5,EXC_SR(%a6)
2175 bne.b fu_snan_s_p
2176
2177 mov.l EXC_A7(%a6),%a0
2178 mov.l %a0,%usp
2179 bra.w fu_snan
2180
2181fu_snan_s_p:
2182 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2183 bne.w fu_snan
2184
2185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186# the strategy is to move the exception frame "down" 12 bytes. then, we
2187# can store the default result where the exception frame was.
2188 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2189 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2191
2192 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2193 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2194
2195 frestore FP_SRC(%a6) # restore src operand
2196
2197 mov.l (%a6),%a6 # restore frame pointer
2198
2199 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202
2203# now, we copy the default result to its proper location
2204 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207
2208 add.l &LOCAL_SIZE-0x8,%sp
2209
2210
2211 bra.l _real_snan
2212
2213fu_operr_p:
2214 btst &0x5,EXC_SR(%a6)
2215 bne.w fu_operr_p_s
2216
2217 mov.l EXC_A7(%a6),%a0
2218 mov.l %a0,%usp
2219 bra.w fu_operr
2220
2221fu_operr_p_s:
2222 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2223 bne.w fu_operr
2224
2225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226# the strategy is to move the exception frame "down" 12 bytes. then, we
2227# can store the default result where the exception frame was.
2228 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2229 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2231
2232 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2233 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2234
2235 frestore FP_SRC(%a6) # restore src operand
2236
2237 mov.l (%a6),%a6 # restore frame pointer
2238
2239 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242
2243# now, we copy the default result to its proper location
2244 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247
2248 add.l &LOCAL_SIZE-0x8,%sp
2249
2250
2251 bra.l _real_operr
2252
2253fu_inex_p2:
2254 btst &0x5,EXC_SR(%a6)
2255 bne.w fu_inex_s_p2
2256
2257 mov.l EXC_A7(%a6),%a0
2258 mov.l %a0,%usp
2259 bra.w fu_inex
2260
2261fu_inex_s_p2:
2262 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2263 bne.w fu_inex
2264
2265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266# the strategy is to move the exception frame "down" 12 bytes. then, we
2267# can store the default result where the exception frame was.
2268 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2269 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2271
2272 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2273 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2274
2275 frestore FP_SRC(%a6) # restore src operand
2276
2277 mov.l (%a6),%a6 # restore frame pointer
2278
2279 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282
2283# now, we copy the default result to its proper location
2284 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287
2288 add.l &LOCAL_SIZE-0x8,%sp
2289
2290
2291 bra.l _real_inex
2292
2293#########################################################################
2294
2295#
2296# if we're stuffing a source operand back into an fsave frame then we
2297# have to make sure that for single or double source operands that the
2298# format stuffed is as weird as the hardware usually makes it.
2299#
2300 global funimp_skew
2301funimp_skew:
2302 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303 cmpi.b %d0,&0x1 # was src sgl?
2304 beq.b funimp_skew_sgl # yes
2305 cmpi.b %d0,&0x5 # was src dbl?
2306 beq.b funimp_skew_dbl # yes
2307 rts
2308
2309funimp_skew_sgl:
2310 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2311 andi.w &0x7fff,%d0 # strip sign
2312 beq.b funimp_skew_sgl_not
2313 cmpi.w %d0,&0x3f80
2314 bgt.b funimp_skew_sgl_not
2315 neg.w %d0 # make exponent negative
2316 addi.w &0x3f81,%d0 # find amt to shift
2317 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2318 lsr.l %d0,%d1 # shift it
2319 bset &31,%d1 # set j-bit
2320 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2321 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2322 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2323funimp_skew_sgl_not:
2324 rts
2325
2326funimp_skew_dbl:
2327 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2328 andi.w &0x7fff,%d0 # strip sign
2329 beq.b funimp_skew_dbl_not
2330 cmpi.w %d0,&0x3c00
2331 bgt.b funimp_skew_dbl_not
2332
2333 tst.b FP_SRC_EX(%a6) # make "internal format"
2334 smi.b 0x2+FP_SRC(%a6)
2335 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2336 clr.l %d0 # clear g,r,s
2337 lea FP_SRC(%a6),%a0 # pass ptr to src op
2338 mov.w &0x3c01,%d1 # pass denorm threshold
2339 bsr.l dnrm_lp # denorm it
2340 mov.w &0x3c00,%d0 # new exponent
2341 tst.b 0x2+FP_SRC(%a6) # is sign set?
2342 beq.b fss_dbl_denorm_done # no
2343 bset &15,%d0 # set sign
2344fss_dbl_denorm_done:
2345 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2346 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2347funimp_skew_dbl_not:
2348 rts
2349
2350#########################################################################
2351 global _mem_write2
2352_mem_write2:
2353 btst &0x5,EXC_SR(%a6)
2354 beq.l _dmem_write
2355 mov.l 0x0(%a0),FP_DST_EX(%a6)
2356 mov.l 0x4(%a0),FP_DST_HI(%a6)
2357 mov.l 0x8(%a0),FP_DST_LO(%a6)
2358 clr.l %d1
2359 rts
2360
2361#########################################################################
2362# XDEF **************************************************************** #
2363# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2364# effective address" exception. #
2365# #
2366# This handler should be the first code executed upon taking the #
2367# FP Unimplemented Effective Address exception in an operating #
2368# system. #
2369# #
2370# XREF **************************************************************** #
2371# _imem_read_long() - read instruction longword #
2372# fix_skewed_ops() - adjust src operand in fsave frame #
2373# set_tag_x() - determine optype of src/dst operands #
2374# store_fpreg() - store opclass 0 or 2 result to FP regfile #
2375# unnorm_fix() - change UNNORM operands to NORM or ZERO #
2376# load_fpn2() - load dst operand from FP regfile #
2377# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2378# decbin() - convert packed data to FP binary data #
2379# _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2380# _real_access() - "callout" for access error exception #
2381# _mem_read() - read extended immediate operand from memory #
2382# _fpsp_done() - "callout" for exit; work all done #
2383# _real_trace() - "callout" for Trace enabled exception #
2384# fmovm_dynamic() - emulate dynamic fmovm instruction #
2385# fmovm_ctrl() - emulate fmovm control instruction #
2386# #
2387# INPUT *************************************************************** #
2388# - The system stack contains the "Unimplemented <ea>" stk frame #
2389# #
2390# OUTPUT ************************************************************** #
2391# If access error: #
2392# - The system stack is changed to an access error stack frame #
2393# If FPU disabled: #
2394# - The system stack is changed to an FPU disabled stack frame #
2395# If Trace exception enabled: #
2396# - The system stack is changed to a Trace exception stack frame #
2397# Else: (normal case) #
2398# - None (correct result has been stored as appropriate) #
2399# #
2400# ALGORITHM *********************************************************** #
2401# This exception handles 3 types of operations: #
2402# (1) FP Instructions using extended precision or packed immediate #
2403# addressing mode. #
2404# (2) The "fmovm.x" instruction w/ dynamic register specification. #
2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2406# #
2407# For immediate data operations, the data is read in w/ a #
2408# _mem_read() "callout", converted to FP binary (if packed), and used #
2409# as the source operand to the instruction specified by the instruction #
2410# word. If no FP exception should be reported ads a result of the #
2411# emulation, then the result is stored to the destination register and #
2412# the handler exits through _fpsp_done(). If an enabled exc has been #
2413# signalled as a result of emulation, then an fsave state frame #
2414# corresponding to the FP exception type must be entered into the 060 #
2415# FPU before exiting. In either the enabled or disabled cases, we #
2416# must also check if a Trace exception is pending, in which case, we #
2417# must create a Trace exception stack frame from the current exception #
2418# stack frame. If no Trace is pending, we simply exit through #
2419# _fpsp_done(). #
2420# For "fmovm.x", call the routine fmovm_dynamic() which will #
2421# decode and emulate the instruction. No FP exceptions can be pending #
2422# as a result of this operation emulation. A Trace exception can be #
2423# pending, though, which means the current stack frame must be changed #
2424# to a Trace stack frame and an exit made through _real_trace(). #
2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2426# was executed from supervisor mode, this handler must store the FP #
2427# register file values to the system stack by itself since #
2428# fmovm_dynamic() can't handle this. A normal exit is made through #
2429# fpsp_done(). #
2430# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2431# Again, a Trace exception may be pending and an exit made through #
2432# _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2433# #
2434# Before any of the above is attempted, it must be checked to #
2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2436# before the "FPU disabled" exception, but the "FPU disabled" exception #
2437# has higher priority, we check the disabled bit in the PCR. If set, #
2438# then we must create an 8 word "FPU disabled" exception stack frame #
2439# from the current 4 word exception stack frame. This includes #
2440# reproducing the effective address of the instruction to put on the #
2441# new stack frame. #
2442# #
2443# In the process of all emulation work, if a _mem_read() #
2444# "callout" returns a failing result indicating an access error, then #
2445# we must create an access error stack frame from the current stack #
2446# frame. This information includes a faulting address and a fault- #
2447# status-longword. These are created within this handler. #
2448# #
2449#########################################################################
2450
2451 global _fpsp_effadd
2452_fpsp_effadd:
2453
2454# This exception type takes priority over the "Line F Emulator"
2455# exception. Therefore, the FPU could be disabled when entering here.
2456# So, we must check to see if it's disabled and handle that case separately.
2457 mov.l %d0,-(%sp) # save d0
2458 movc %pcr,%d0 # load proc cr
2459 btst &0x1,%d0 # is FPU disabled?
2460 bne.w iea_disabled # yes
2461 mov.l (%sp)+,%d0 # restore d0
2462
2463 link %a6,&-LOCAL_SIZE # init stack frame
2464
2465 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2466 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2468
2469# PC of instruction that took the exception is the PC in the frame
2470 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471
2472 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2473 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2474 bsr.l _imem_read_long # fetch the instruction words
2475 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2476
2477#########################################################################
2478
2479 tst.w %d0 # is operation fmovem?
2480 bmi.w iea_fmovm # yes
2481
2482#
2483# here, we will have:
2484# fabs fdabs fsabs facos fmod
2485# fadd fdadd fsadd fasin frem
2486# fcmp fatan fscale
2487# fdiv fddiv fsdiv fatanh fsin
2488# fint fcos fsincos
2489# fintrz fcosh fsinh
2490# fmove fdmove fsmove fetox ftan
2491# fmul fdmul fsmul fetoxm1 ftanh
2492# fneg fdneg fsneg fgetexp ftentox
2493# fsgldiv fgetman ftwotox
2494# fsglmul flog10
2495# fsqrt flog2
2496# fsub fdsub fssub flogn
2497# ftst flognp1
2498# which can all use f<op>.{x,p}
2499# so, now it's immediate data extended precision AND PACKED FORMAT!
2500#
2501iea_op:
2502 andi.l &0x00ff00ff,USER_FPSR(%a6)
2503
2504 btst &0xa,%d0 # is src fmt x or p?
2505 bne.b iea_op_pack # packed
2506
2507
2508 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2509 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2510 mov.l &0xc,%d0 # pass: 12 bytes
2511 bsr.l _imem_read # read extended immediate
2512
2513 tst.l %d1 # did ifetch fail?
2514 bne.w iea_iacc # yes
2515
2516 bra.b iea_op_setsrc
2517
2518iea_op_pack:
2519
2520 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2521 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2522 mov.l &0xc,%d0 # pass: 12 bytes
2523 bsr.l _imem_read # read packed operand
2524
2525 tst.l %d1 # did ifetch fail?
2526 bne.w iea_iacc # yes
2527
2528# The packed operand is an INF or a NAN if the exponent field is all ones.
2529 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2530 cmpi.w %d0,&0x7fff # INF or NAN?
2531 beq.b iea_op_setsrc # operand is an INF or NAN
2532
2533# The packed operand is a zero if the mantissa is all zero, else it's
2534# a normal packed op.
2535 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2536 andi.b &0x0f,%d0 # clear all but last nybble
2537 bne.b iea_op_gp_not_spec # not a zero
2538 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2539 bne.b iea_op_gp_not_spec # not a zero
2540 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2541 beq.b iea_op_setsrc # operand is a ZERO
2542iea_op_gp_not_spec:
2543 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2544 bsr.l decbin # convert to extended
2545 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2546
2547iea_op_setsrc:
2548 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2549
2550# FP_SRC now holds the src operand.
2551 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2552 bsr.l set_tag_x # tag the operand type
2553 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2554 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2555 bne.b iea_op_getdst # no
2556 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2557 mov.b %d0,STAG(%a6) # set new optype tag
2558iea_op_getdst:
2559 clr.b STORE_FLG(%a6) # clear "store result" boolean
2560
2561 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2562 beq.b iea_op_extract # monadic
2563 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2564 bne.b iea_op_spec # yes
2565
2566iea_op_loaddst:
2567 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568 bsr.l load_fpn2 # load dst operand
2569
2570 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2571 bsr.l set_tag_x # tag the operand type
2572 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2573 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2574 bne.b iea_op_extract # no
2575 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2576 mov.b %d0,DTAG(%a6) # set new optype tag
2577 bra.b iea_op_extract
2578
2579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580iea_op_spec:
2581 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2582 beq.b iea_op_extract # yes
2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584# store a result. then, only fcmp will branch back and pick up a dst operand.
2585 st STORE_FLG(%a6) # don't store a final result
2586 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2587 beq.b iea_op_loaddst # yes
2588
2589iea_op_extract:
2590 clr.l %d0
2591 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2592
2593 mov.b 1+EXC_CMDREG(%a6),%d1
2594 andi.w &0x007f,%d1 # extract extension
2595
2596 fmov.l &0x0,%fpcr
2597 fmov.l &0x0,%fpsr
2598
2599 lea FP_SRC(%a6),%a0
2600 lea FP_DST(%a6),%a1
2601
2602 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2604
2605#
2606# Exceptions in order of precedence:
2607# BSUN : none
2608# SNAN : all operations
2609# OPERR : all reg-reg or mem-reg operations that can normally operr
2610# OVFL : same as OPERR
2611# UNFL : same as OPERR
2612# DZ : same as OPERR
2613# INEX2 : same as OPERR
2614# INEX1 : all packed immediate operations
2615#
2616
2617# we determine the highest priority exception(if any) set by the
2618# emulation routine that has also been enabled by the user.
2619 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2620 bne.b iea_op_ena # some are enabled
2621
2622# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623# these don't save results.
2624iea_op_save:
2625 tst.b STORE_FLG(%a6) # does this op store a result?
2626 bne.b iea_op_exit1 # exit with no frestore
2627
2628iea_op_store:
2629 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630 bsr.l store_fpreg # store the result
2631
2632iea_op_exit1:
2633 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635
2636 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2637 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2639
2640 unlk %a6 # unravel the frame
2641
2642 btst &0x7,(%sp) # is trace on?
2643 bne.w iea_op_trace # yes
2644
2645 bra.l _fpsp_done # exit to os
2646
2647iea_op_ena:
2648 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2649 bfffo %d0{&24:&8},%d0 # find highest priority exception
2650 bne.b iea_op_exc # at least one was set
2651
2652# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655 beq.b iea_op_save
2656
2657iea_op_ovfl:
2658 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659 beq.b iea_op_store # no
2660 bra.b iea_op_exc_ovfl # yes
2661
2662# an enabled exception occurred. we have to insert the exception type back into
2663# the machine.
2664iea_op_exc:
2665 subi.l &24,%d0 # fix offset to be 0-8
2666 cmpi.b %d0,&0x6 # is exception INEX?
2667 bne.b iea_op_exc_force # no
2668
2669# the enabled exception was inexact. so, if it occurs with an overflow
2670# or underflow that was disabled, then we have to force an overflow or
2671# underflow frame.
2672 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673 bne.b iea_op_exc_ovfl # yes
2674 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675 bne.b iea_op_exc_unfl # yes
2676
2677iea_op_exc_force:
2678 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679 bra.b iea_op_exit2 # exit with frestore
2680
2681tbl_iea_except:
2682 short 0xe002, 0xe006, 0xe004, 0xe005
2683 short 0xe003, 0xe002, 0xe001, 0xe001
2684
2685iea_op_exc_ovfl:
2686 mov.w &0xe005,2+FP_SRC(%a6)
2687 bra.b iea_op_exit2
2688
2689iea_op_exc_unfl:
2690 mov.w &0xe003,2+FP_SRC(%a6)
2691
2692iea_op_exit2:
2693 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695
2696 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2697 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2699
2700 frestore FP_SRC(%a6) # restore exceptional state
2701
2702 unlk %a6 # unravel the frame
2703
2704 btst &0x7,(%sp) # is trace on?
2705 bne.b iea_op_trace # yes
2706
2707 bra.l _fpsp_done # exit to os
2708
2709#
2710# The opclass two instruction that took an "Unimplemented Effective Address"
2711# exception was being traced. Make the "current" PC the FPIAR and put it in
2712# the trace stack frame then jump to _real_trace().
2713#
2714# UNIMP EA FRAME TRACE FRAME
2715# ***************** *****************
2716# * 0x0 * 0x0f0 * * Current *
2717# ***************** * PC *
2718# * Current * *****************
2719# * PC * * 0x2 * 0x024 *
2720# ***************** *****************
2721# * SR * * Next *
2722# ***************** * PC *
2723# *****************
2724# * SR *
2725# *****************
2726iea_op_trace:
2727 mov.l (%sp),-(%sp) # shift stack frame "down"
2728 mov.w 0x8(%sp),0x4(%sp)
2729 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2730 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2731
2732 bra.l _real_trace
2733
2734#########################################################################
2735iea_fmovm:
2736 btst &14,%d0 # ctrl or data reg
2737 beq.w iea_fmovm_ctrl
2738
2739iea_fmovm_data:
2740
2741 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2742 bne.b iea_fmovm_data_s
2743
2744iea_fmovm_data_u:
2745 mov.l %usp,%a0
2746 mov.l %a0,EXC_A7(%a6) # store current a7
2747 bsr.l fmovm_dynamic # do dynamic fmovm
2748 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2749 mov.l %a0,%usp # update usp
2750 bra.w iea_fmovm_exit
2751
2752iea_fmovm_data_s:
2753 clr.b SPCOND_FLG(%a6)
2754 lea 0x2+EXC_VOFF(%a6),%a0
2755 mov.l %a0,EXC_A7(%a6)
2756 bsr.l fmovm_dynamic # do dynamic fmovm
2757
2758 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2759 beq.w iea_fmovm_data_predec
2760 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2761 bne.w iea_fmovm_exit
2762
2763# right now, d0 = the size.
2764# the data has been fetched from the supervisor stack, but we have not
2765# incremented the stack pointer by the appropriate number of bytes.
2766# do it here.
2767iea_fmovm_data_postinc:
2768 btst &0x7,EXC_SR(%a6)
2769 bne.b iea_fmovm_data_pi_trace
2770
2771 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2774
2775 lea (EXC_SR,%a6,%d0),%a0
2776 mov.l %a0,EXC_SR(%a6)
2777
2778 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2779 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2781
2782 unlk %a6
2783 mov.l (%sp)+,%sp
2784 bra.l _fpsp_done
2785
2786iea_fmovm_data_pi_trace:
2787 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791
2792 lea (EXC_SR-0x4,%a6,%d0),%a0
2793 mov.l %a0,EXC_SR(%a6)
2794
2795 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2796 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2798
2799 unlk %a6
2800 mov.l (%sp)+,%sp
2801 bra.l _real_trace
2802
2803# right now, d1 = size and d0 = the strg.
2804iea_fmovm_data_predec:
2805 mov.b %d1,EXC_VOFF(%a6) # store strg
2806 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2807
2808 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2809 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2811
2812 mov.l (%a6),-(%sp) # make a copy of a6
2813 mov.l %d0,-(%sp) # save d0
2814 mov.l %d1,-(%sp) # save d1
2815 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2816
2817 clr.l %d0
2818 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2819 neg.l %d0 # get negative of size
2820
2821 btst &0x7,EXC_SR(%a6) # is trace enabled?
2822 beq.b iea_fmovm_data_p2
2823
2824 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2827 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828
2829 pea (%a6,%d0) # create final sp
2830 bra.b iea_fmovm_data_p3
2831
2832iea_fmovm_data_p2:
2833 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2835 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2836
2837 pea (0x4,%a6,%d0) # create final sp
2838
2839iea_fmovm_data_p3:
2840 clr.l %d1
2841 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2842
2843 tst.b %d1
2844 bpl.b fm_1
2845 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2846 addi.l &0xc,%d0
2847fm_1:
2848 lsl.b &0x1,%d1
2849 bpl.b fm_2
2850 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2851 addi.l &0xc,%d0
2852fm_2:
2853 lsl.b &0x1,%d1
2854 bpl.b fm_3
2855 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2856 addi.l &0xc,%d0
2857fm_3:
2858 lsl.b &0x1,%d1
2859 bpl.b fm_4
2860 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2861 addi.l &0xc,%d0
2862fm_4:
2863 lsl.b &0x1,%d1
2864 bpl.b fm_5
2865 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2866 addi.l &0xc,%d0
2867fm_5:
2868 lsl.b &0x1,%d1
2869 bpl.b fm_6
2870 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2871 addi.l &0xc,%d0
2872fm_6:
2873 lsl.b &0x1,%d1
2874 bpl.b fm_7
2875 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2876 addi.l &0xc,%d0
2877fm_7:
2878 lsl.b &0x1,%d1
2879 bpl.b fm_end
2880 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2881fm_end:
2882 mov.l 0x4(%sp),%d1
2883 mov.l 0x8(%sp),%d0
2884 mov.l 0xc(%sp),%a6
2885 mov.l (%sp)+,%sp
2886
2887 btst &0x7,(%sp) # is trace enabled?
2888 beq.l _fpsp_done
2889 bra.l _real_trace
2890
2891#########################################################################
2892iea_fmovm_ctrl:
2893
2894 bsr.l fmovm_ctrl # load ctrl regs
2895
2896iea_fmovm_exit:
2897 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2898 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2900
2901 btst &0x7,EXC_SR(%a6) # is trace on?
2902 bne.b iea_fmovm_trace # yes
2903
2904 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905
2906 unlk %a6 # unravel the frame
2907
2908 bra.l _fpsp_done # exit to os
2909
2910#
2911# The control reg instruction that took an "Unimplemented Effective Address"
2912# exception was being traced. The "Current PC" for the trace frame is the
2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914# After fixing the stack frame, jump to _real_trace().
2915#
2916# UNIMP EA FRAME TRACE FRAME
2917# ***************** *****************
2918# * 0x0 * 0x0f0 * * Current *
2919# ***************** * PC *
2920# * Current * *****************
2921# * PC * * 0x2 * 0x024 *
2922# ***************** *****************
2923# * SR * * Next *
2924# ***************** * PC *
2925# *****************
2926# * SR *
2927# *****************
2928# this ain't a pretty solution, but it works:
2929# -restore a6 (not with unlk)
2930# -shift stack frame down over where old a6 used to be
2931# -add LOCAL_SIZE to stack pointer
2932iea_fmovm_trace:
2933 mov.l (%a6),%a6 # restore frame pointer
2934 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938 add.l &LOCAL_SIZE,%sp # clear stack frame
2939
2940 bra.l _real_trace
2941
2942#########################################################################
2943# The FPU is disabled and so we should really have taken the "Line
2944# F Emulator" exception. So, here we create an 8-word stack frame
2945# from our 4-word stack frame. This means we must calculate the length
2946# the faulting instruction to get the "next PC". This is trivial for
2947# immediate operands but requires some extra work for fmovm dynamic
2948# which can use most addressing modes.
2949iea_disabled:
2950 mov.l (%sp)+,%d0 # restore d0
2951
2952 link %a6,&-LOCAL_SIZE # init stack frame
2953
2954 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2955
2956# PC of instruction that took the exception is the PC in the frame
2957 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2959 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2960 bsr.l _imem_read_long # fetch the instruction words
2961 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2962
2963 tst.w %d0 # is instr fmovm?
2964 bmi.b iea_dis_fmovm # yes
2965# instruction is using an extended precision immediate operand. Therefore,
2966# the total instruction length is 16 bytes.
2967iea_dis_immed:
2968 mov.l &0x10,%d0 # 16 bytes of instruction
2969 bra.b iea_dis_cont
2970iea_dis_fmovm:
2971 btst &0xe,%d0 # is instr fmovm ctrl
2972 bne.b iea_dis_fmovm_data # no
2973# the instruction is a fmovm.l with 2 or 3 registers.
2974 bfextu %d0{&19:&3},%d1
2975 mov.l &0xc,%d0
2976 cmpi.b %d1,&0x7 # move all regs?
2977 bne.b iea_dis_cont
2978 addq.l &0x4,%d0
2979 bra.b iea_dis_cont
2980# the instruction is an fmovm.x dynamic which can use many addressing
2981# modes and thus can have several different total instruction lengths.
2982# call fmovm_calc_ea which will go through the ea calc process and,
2983# as a by-product, will tell us how long the instruction is.
2984iea_dis_fmovm_data:
2985 clr.l %d0
2986 bsr.l fmovm_calc_ea
2987 mov.l EXC_EXTWPTR(%a6),%d0
2988 sub.l EXC_PC(%a6),%d0
2989iea_dis_cont:
2990 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
2991
2992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2993
2994 unlk %a6
2995
2996# here, we actually create the 8-word frame from the 4-word frame,
2997# with the "next PC" as additional info.
2998# the <ea> field is let as undefined.
2999 subq.l &0x8,%sp # make room for new stack
3000 mov.l %d0,-(%sp) # save d0
3001 mov.w 0xc(%sp),0x4(%sp) # move SR
3002 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3003 clr.l %d0
3004 mov.w 0x12(%sp),%d0
3005 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3006 add.l %d0,0x6(%sp) # make Next PC
3007 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3008 mov.l (%sp)+,%d0 # restore d0
3009
3010 bra.l _real_fpu_disabled
3011
3012##########
3013
3014iea_iacc:
3015 movc %pcr,%d0
3016 btst &0x1,%d0
3017 bne.b iea_iacc_cont
3018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3020iea_iacc_cont:
3021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3022
3023 unlk %a6
3024
3025 subq.w &0x8,%sp # make stack frame bigger
3026 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3027 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3028 mov.w &0x4008,0x6(%sp) # store voff
3029 mov.l 0x2(%sp),0x8(%sp) # store ea
3030 mov.l &0x09428001,0xc(%sp) # store fslw
3031
3032iea_acc_done:
3033 btst &0x5,(%sp) # user or supervisor mode?
3034 beq.b iea_acc_done2 # user
3035 bset &0x2,0xd(%sp) # set supervisor TM bit
3036
3037iea_acc_done2:
3038 bra.l _real_access
3039
3040iea_dacc:
3041 lea -LOCAL_SIZE(%a6),%sp
3042
3043 movc %pcr,%d1
3044 btst &0x1,%d1
3045 bne.b iea_dacc_cont
3046 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3047 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048iea_dacc_cont:
3049 mov.l (%a6),%a6
3050
3051 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057
3058 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059 add.w &LOCAL_SIZE-0x4,%sp
3060
3061 bra.b iea_acc_done
3062
3063#########################################################################
3064# XDEF **************************************************************** #
3065# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3066# #
3067# This handler should be the first code executed upon taking the #
3068# FP Operand Error exception in an operating system. #
3069# #
3070# XREF **************************************************************** #
3071# _imem_read_long() - read instruction longword #
3072# fix_skewed_ops() - adjust src operand in fsave frame #
3073# _real_operr() - "callout" to operating system operr handler #
3074# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3075# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3076# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3077# #
3078# INPUT *************************************************************** #
3079# - The system stack contains the FP Operr exception frame #
3080# - The fsave frame contains the source operand #
3081# #
3082# OUTPUT ************************************************************** #
3083# No access error: #
3084# - The system stack is unchanged #
3085# - The fsave frame contains the adjusted src op for opclass 0,2 #
3086# #
3087# ALGORITHM *********************************************************** #
3088# In a system where the FP Operr exception is enabled, the goal #
3089# is to get to the handler specified at _real_operr(). But, on the 060, #
3090# for opclass zero and two instruction taking this exception, the #
3091# input operand in the fsave frame may be incorrect for some cases #
3092# and needs to be corrected. This handler calls fix_skewed_ops() to #
3093# do just this and then exits through _real_operr(). #
3094# For opclass 3 instructions, the 060 doesn't store the default #
3095# operr result out to memory or data register file as it should. #
3096# This code must emulate the move out before finally exiting through #
3097# _real_inex(). The move out, if to memory, is performed using #
3098# _mem_write() "callout" routines that may return a failing result. #
3099# In this special case, the handler must exit through facc_out() #
3100# which creates an access error stack frame from the current operr #
3101# stack frame. #
3102# #
3103#########################################################################
3104
3105 global _fpsp_operr
3106_fpsp_operr:
3107
3108 link.w %a6,&-LOCAL_SIZE # init stack frame
3109
3110 fsave FP_SRC(%a6) # grab the "busy" frame
3111
3112 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3113 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3115
3116# the FPIAR holds the "current PC" of the faulting instruction
3117 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118
3119 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3120 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3121 bsr.l _imem_read_long # fetch the instruction words
3122 mov.l %d0,EXC_OPWORD(%a6)
3123
3124##############################################################################
3125
3126 btst &13,%d0 # is instr an fmove out?
3127 bne.b foperr_out # fmove out
3128
3129
3130# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131# this would be the case for opclass two operations with a source infinity or
3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133# cause an operr so we don't need to check for them here.
3134 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3135 bsr.l fix_skewed_ops # fix src op
3136
3137foperr_exit:
3138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3141
3142 frestore FP_SRC(%a6)
3143
3144 unlk %a6
3145 bra.l _real_operr
3146
3147########################################################################
3148
3149#
3150# the hardware does not save the default result to memory on enabled
3151# operand error exceptions. we do this here before passing control to
3152# the user operand error handler.
3153#
3154# byte, word, and long destination format operations can pass
3155# through here. we simply need to test the sign of the src
3156# operand and save the appropriate minimum or maximum integer value
3157# to the effective address as pointed to by the stacked effective address.
3158#
3159# although packed opclass three operations can take operand error
3160# exceptions, they won't pass through here since they are caught
3161# first by the unsupported data format exception handler. that handler
3162# sends them directly to _real_operr() if necessary.
3163#
3164foperr_out:
3165
3166 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3167 andi.w &0x7fff,%d1
3168 cmpi.w %d1,&0x7fff
3169 bne.b foperr_out_not_qnan
3170# the operand is either an infinity or a QNAN.
3171 tst.l FP_SRC_LO(%a6)
3172 bne.b foperr_out_qnan
3173 mov.l FP_SRC_HI(%a6),%d1
3174 andi.l &0x7fffffff,%d1
3175 beq.b foperr_out_not_qnan
3176foperr_out_qnan:
3177 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3178 bra.b foperr_out_jmp
3179
3180foperr_out_not_qnan:
3181 mov.l &0x7fffffff,%d1
3182 tst.b FP_SRC_EX(%a6)
3183 bpl.b foperr_out_not_qnan2
3184 addq.l &0x1,%d1
3185foperr_out_not_qnan2:
3186 mov.l %d1,L_SCR1(%a6)
3187
3188foperr_out_jmp:
3189 bfextu %d0{&19:&3},%d0 # extract dst format field
3190 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3191 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3192 jmp (tbl_operr.b,%pc,%a0)
3193
3194tbl_operr:
3195 short foperr_out_l - tbl_operr # long word integer
3196 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3197 short tbl_operr - tbl_operr # ext prec shouldn't happen
3198 short foperr_exit - tbl_operr # packed won't enter here
3199 short foperr_out_w - tbl_operr # word integer
3200 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3201 short foperr_out_b - tbl_operr # byte integer
3202 short tbl_operr - tbl_operr # packed won't enter here
3203
3204foperr_out_b:
3205 mov.b L_SCR1(%a6),%d0 # load positive default result
3206 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3207 ble.b foperr_out_b_save_dn # yes
3208 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3209 bsr.l _dmem_write_byte # write the default result
3210
3211 tst.l %d1 # did dstore fail?
3212 bne.l facc_out_b # yes
3213
3214 bra.w foperr_exit
3215foperr_out_b_save_dn:
3216 andi.w &0x0007,%d1
3217 bsr.l store_dreg_b # store result to regfile
3218 bra.w foperr_exit
3219
3220foperr_out_w:
3221 mov.w L_SCR1(%a6),%d0 # load positive default result
3222 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3223 ble.b foperr_out_w_save_dn # yes
3224 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3225 bsr.l _dmem_write_word # write the default result
3226
3227 tst.l %d1 # did dstore fail?
3228 bne.l facc_out_w # yes
3229
3230 bra.w foperr_exit
3231foperr_out_w_save_dn:
3232 andi.w &0x0007,%d1
3233 bsr.l store_dreg_w # store result to regfile
3234 bra.w foperr_exit
3235
3236foperr_out_l:
3237 mov.l L_SCR1(%a6),%d0 # load positive default result
3238 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3239 ble.b foperr_out_l_save_dn # yes
3240 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3241 bsr.l _dmem_write_long # write the default result
3242
3243 tst.l %d1 # did dstore fail?
3244 bne.l facc_out_l # yes
3245
3246 bra.w foperr_exit
3247foperr_out_l_save_dn:
3248 andi.w &0x0007,%d1
3249 bsr.l store_dreg_l # store result to regfile
3250 bra.w foperr_exit
3251
3252#########################################################################
3253# XDEF **************************************************************** #
3254# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3255# #
3256# This handler should be the first code executed upon taking the #
3257# FP Signalling NAN exception in an operating system. #
3258# #
3259# XREF **************************************************************** #
3260# _imem_read_long() - read instruction longword #
3261# fix_skewed_ops() - adjust src operand in fsave frame #
3262# _real_snan() - "callout" to operating system SNAN handler #
3263# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3264# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3265# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3266# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3267# #
3268# INPUT *************************************************************** #
3269# - The system stack contains the FP SNAN exception frame #
3270# - The fsave frame contains the source operand #
3271# #
3272# OUTPUT ************************************************************** #
3273# No access error: #
3274# - The system stack is unchanged #
3275# - The fsave frame contains the adjusted src op for opclass 0,2 #
3276# #
3277# ALGORITHM *********************************************************** #
3278# In a system where the FP SNAN exception is enabled, the goal #
3279# is to get to the handler specified at _real_snan(). But, on the 060, #
3280# for opclass zero and two instructions taking this exception, the #
3281# input operand in the fsave frame may be incorrect for some cases #
3282# and needs to be corrected. This handler calls fix_skewed_ops() to #
3283# do just this and then exits through _real_snan(). #
3284# For opclass 3 instructions, the 060 doesn't store the default #
3285# SNAN result out to memory or data register file as it should. #
3286# This code must emulate the move out before finally exiting through #
3287# _real_snan(). The move out, if to memory, is performed using #
3288# _mem_write() "callout" routines that may return a failing result. #
3289# In this special case, the handler must exit through facc_out() #
3290# which creates an access error stack frame from the current SNAN #
3291# stack frame. #
3292# For the case of an extended precision opclass 3 instruction, #
3293# if the effective addressing mode was -() or ()+, then the address #
3294# register must get updated by calling _calc_ea_fout(). If the <ea> #
3295# was -(a7) from supervisor mode, then the exception frame currently #
3296# on the system stack must be carefully moved "down" to make room #
3297# for the operand being moved. #
3298# #
3299#########################################################################
3300
3301 global _fpsp_snan
3302_fpsp_snan:
3303
3304 link.w %a6,&-LOCAL_SIZE # init stack frame
3305
3306 fsave FP_SRC(%a6) # grab the "busy" frame
3307
3308 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3309 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3311
3312# the FPIAR holds the "current PC" of the faulting instruction
3313 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314
3315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3316 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3317 bsr.l _imem_read_long # fetch the instruction words
3318 mov.l %d0,EXC_OPWORD(%a6)
3319
3320##############################################################################
3321
3322 btst &13,%d0 # is instr an fmove out?
3323 bne.w fsnan_out # fmove out
3324
3325
3326# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327# this would be the case for opclass two operations with a source infinity or
3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329# fixed here.
3330 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3331 bsr.l fix_skewed_ops # fix src op
3332
3333fsnan_exit:
3334 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3335 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3337
3338 frestore FP_SRC(%a6)
3339
3340 unlk %a6
3341 bra.l _real_snan
3342
3343########################################################################
3344
3345#
3346# the hardware does not save the default result to memory on enabled
3347# snan exceptions. we do this here before passing control to
3348# the user snan handler.
3349#
3350# byte, word, long, and packed destination format operations can pass
3351# through here. since packed format operations already were handled by
3352# fpsp_unsupp(), then we need to do nothing else for them here.
3353# for byte, word, and long, we simply need to test the sign of the src
3354# operand and save the appropriate minimum or maximum integer value
3355# to the effective address as pointed to by the stacked effective address.
3356#
3357fsnan_out:
3358
3359 bfextu %d0{&19:&3},%d0 # extract dst format field
3360 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3361 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3362 jmp (tbl_snan.b,%pc,%a0)
3363
3364tbl_snan:
3365 short fsnan_out_l - tbl_snan # long word integer
3366 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368 short tbl_snan - tbl_snan # packed needs no help
3369 short fsnan_out_w - tbl_snan # word integer
3370 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371 short fsnan_out_b - tbl_snan # byte integer
3372 short tbl_snan - tbl_snan # packed needs no help
3373
3374fsnan_out_b:
3375 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3376 bset &6,%d0 # set SNAN bit
3377 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3378 ble.b fsnan_out_b_dn # yes
3379 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3380 bsr.l _dmem_write_byte # write the default result
3381
3382 tst.l %d1 # did dstore fail?
3383 bne.l facc_out_b # yes
3384
3385 bra.w fsnan_exit
3386fsnan_out_b_dn:
3387 andi.w &0x0007,%d1
3388 bsr.l store_dreg_b # store result to regfile
3389 bra.w fsnan_exit
3390
3391fsnan_out_w:
3392 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3393 bset &14,%d0 # set SNAN bit
3394 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3395 ble.b fsnan_out_w_dn # yes
3396 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3397 bsr.l _dmem_write_word # write the default result
3398
3399 tst.l %d1 # did dstore fail?
3400 bne.l facc_out_w # yes
3401
3402 bra.w fsnan_exit
3403fsnan_out_w_dn:
3404 andi.w &0x0007,%d1
3405 bsr.l store_dreg_w # store result to regfile
3406 bra.w fsnan_exit
3407
3408fsnan_out_l:
3409 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3410 bset &30,%d0 # set SNAN bit
3411 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3412 ble.b fsnan_out_l_dn # yes
3413 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3414 bsr.l _dmem_write_long # write the default result
3415
3416 tst.l %d1 # did dstore fail?
3417 bne.l facc_out_l # yes
3418
3419 bra.w fsnan_exit
3420fsnan_out_l_dn:
3421 andi.w &0x0007,%d1
3422 bsr.l store_dreg_l # store result to regfile
3423 bra.w fsnan_exit
3424
3425fsnan_out_s:
3426 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3427 ble.b fsnan_out_d_dn # yes
3428 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3429 andi.l &0x80000000,%d0 # keep sign
3430 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3431 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3432 lsr.l &0x8,%d1 # shift mantissa for sgl
3433 or.l %d1,%d0 # create sgl SNAN
3434 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3435 bsr.l _dmem_write_long # write the default result
3436
3437 tst.l %d1 # did dstore fail?
3438 bne.l facc_out_l # yes
3439
3440 bra.w fsnan_exit
3441fsnan_out_d_dn:
3442 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3443 andi.l &0x80000000,%d0 # keep sign
3444 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3445 mov.l %d1,-(%sp)
3446 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3447 lsr.l &0x8,%d1 # shift mantissa for sgl
3448 or.l %d1,%d0 # create sgl SNAN
3449 mov.l (%sp)+,%d1
3450 andi.w &0x0007,%d1
3451 bsr.l store_dreg_l # store result to regfile
3452 bra.w fsnan_exit
3453
3454fsnan_out_d:
3455 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3456 andi.l &0x80000000,%d0 # keep sign
3457 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3458 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3459 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3460 mov.l &11,%d0 # load shift amt
3461 lsr.l %d0,%d1
3462 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3463 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3464 andi.l &0x000007ff,%d1
3465 ror.l %d0,%d1
3466 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3467 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3468 lsr.l %d0,%d1
3469 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3470 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3471 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3472 movq.l &0x8,%d0 # pass: size of 8 bytes
3473 bsr.l _dmem_write # write the default result
3474
3475 tst.l %d1 # did dstore fail?
3476 bne.l facc_out_d # yes
3477
3478 bra.w fsnan_exit
3479
3480# for extended precision, if the addressing mode is pre-decrement or
3481# post-increment, then the address register did not get updated.
3482# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483fsnan_out_x:
3484 clr.b SPCOND_FLG(%a6) # clear special case flag
3485
3486 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487 clr.w 2+FP_SCR0(%a6)
3488 mov.l FP_SRC_HI(%a6),%d0
3489 bset &30,%d0
3490 mov.l %d0,FP_SCR0_HI(%a6)
3491 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492
3493 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3494 bne.b fsnan_out_x_s # yes
3495
3496 mov.l %usp,%a0 # fetch user stack pointer
3497 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3498 mov.l (%a6),EXC_A6(%a6)
3499
3500 bsr.l _calc_ea_fout # find the correct ea,update An
3501 mov.l %a0,%a1
3502 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3503
3504 mov.l EXC_A7(%a6),%a0
3505 mov.l %a0,%usp # restore user stack pointer
3506 mov.l EXC_A6(%a6),(%a6)
3507
3508fsnan_out_x_save:
3509 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3510 movq.l &0xc,%d0 # pass: size of extended
3511 bsr.l _dmem_write # write the default result
3512
3513 tst.l %d1 # did dstore fail?
3514 bne.l facc_out_x # yes
3515
3516 bra.w fsnan_exit
3517
3518fsnan_out_x_s:
3519 mov.l (%a6),EXC_A6(%a6)
3520
3521 bsr.l _calc_ea_fout # find the correct ea,update An
3522 mov.l %a0,%a1
3523 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3524
3525 mov.l EXC_A6(%a6),(%a6)
3526
3527 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528 bne.b fsnan_out_x_save # no
3529
3530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3532 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3534
3535 frestore FP_SRC(%a6)
3536
3537 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3538
3539 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542
3543 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546
3547 add.l &LOCAL_SIZE-0x8,%sp
3548
3549 bra.l _real_snan
3550
3551#########################################################################
3552# XDEF **************************************************************** #
3553# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3554# #
3555# This handler should be the first code executed upon taking the #
3556# FP Inexact exception in an operating system. #
3557# #
3558# XREF **************************************************************** #
3559# _imem_read_long() - read instruction longword #
3560# fix_skewed_ops() - adjust src operand in fsave frame #
3561# set_tag_x() - determine optype of src/dst operands #
3562# store_fpreg() - store opclass 0 or 2 result to FP regfile #
3563# unnorm_fix() - change UNNORM operands to NORM or ZERO #
3564# load_fpn2() - load dst operand from FP regfile #
3565# smovcr() - emulate an "fmovcr" instruction #
3566# fout() - emulate an opclass 3 instruction #
3567# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3568# _real_inex() - "callout" to operating system inexact handler #
3569# #
3570# INPUT *************************************************************** #
3571# - The system stack contains the FP Inexact exception frame #
3572# - The fsave frame contains the source operand #
3573# #
3574# OUTPUT ************************************************************** #
3575# - The system stack is unchanged #
3576# - The fsave frame contains the adjusted src op for opclass 0,2 #
3577# #
3578# ALGORITHM *********************************************************** #
3579# In a system where the FP Inexact exception is enabled, the goal #
3580# is to get to the handler specified at _real_inex(). But, on the 060, #
3581# for opclass zero and two instruction taking this exception, the #
3582# hardware doesn't store the correct result to the destination FP #
3583# register as did the '040 and '881/2. This handler must emulate the #
3584# instruction in order to get this value and then store it to the #
3585# correct register before calling _real_inex(). #
3586# For opclass 3 instructions, the 060 doesn't store the default #
3587# inexact result out to memory or data register file as it should. #
3588# This code must emulate the move out by calling fout() before finally #
3589# exiting through _real_inex(). #
3590# #
3591#########################################################################
3592
3593 global _fpsp_inex
3594_fpsp_inex:
3595
3596 link.w %a6,&-LOCAL_SIZE # init stack frame
3597
3598 fsave FP_SRC(%a6) # grab the "busy" frame
3599
3600 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3601 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3603
3604# the FPIAR holds the "current PC" of the faulting instruction
3605 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606
3607 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3608 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3609 bsr.l _imem_read_long # fetch the instruction words
3610 mov.l %d0,EXC_OPWORD(%a6)
3611
3612##############################################################################
3613
3614 btst &13,%d0 # is instr an fmove out?
3615 bne.w finex_out # fmove out
3616
3617
3618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619# longword integer directly into the upper longword of the mantissa along
3620# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621 bfextu %d0{&19:&3},%d0 # fetch instr size
3622 bne.b finex_cont # instr size is not long
3623 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3624 bne.b finex_cont # no
3625 fmov.l &0x0,%fpcr
3626 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3627 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3628 mov.w &0xe001,0x2+FP_SRC(%a6)
3629
3630finex_cont:
3631 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3632 bsr.l fix_skewed_ops # fix src op
3633
3634# Here, we zero the ccode and exception byte field since we're going to
3635# emulate the whole instruction. Notice, though, that we don't kill the
3636# INEX1 bit. This is because a packed op has long since been converted
3637# to extended before arriving here. Therefore, we need to retain the
3638# INEX1 bit from when the operand was first converted.
3639 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640
3641 fmov.l &0x0,%fpcr # zero current control regs
3642 fmov.l &0x0,%fpsr
3643
3644 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645 cmpi.b %d1,&0x17 # is op an fmovecr?
3646 beq.w finex_fmovcr # yes
3647
3648 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3649 bsr.l set_tag_x # tag the operand type
3650 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3651
3652# bits four and five of the fp extension word separate the monadic and dyadic
3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654# will never take this exception, but fsincos will.
3655 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3656 beq.b finex_extract # monadic
3657
3658 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3659 bne.b finex_extract # yes
3660
3661 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662 bsr.l load_fpn2 # load dst into FP_DST
3663
3664 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3665 bsr.l set_tag_x # tag the operand type
3666 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3667 bne.b finex_op2_done # no
3668 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3669finex_op2_done:
3670 mov.b %d0,DTAG(%a6) # save dst optype tag
3671
3672finex_extract:
3673 clr.l %d0
3674 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3675
3676 mov.b 1+EXC_CMDREG(%a6),%d1
3677 andi.w &0x007f,%d1 # extract extension
3678
3679 lea FP_SRC(%a6),%a0
3680 lea FP_DST(%a6),%a1
3681
3682 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3684
3685# the operation has been emulated. the result is in fp0.
3686finex_save:
3687 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3688 bsr.l store_fpreg
3689
3690finex_exit:
3691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3694
3695 frestore FP_SRC(%a6)
3696
3697 unlk %a6
3698 bra.l _real_inex
3699
3700finex_fmovcr:
3701 clr.l %d0
3702 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3703 mov.b 1+EXC_CMDREG(%a6),%d1
3704 andi.l &0x0000007f,%d1 # pass rom offset
3705 bsr.l smovcr
3706 bra.b finex_save
3707
3708########################################################################
3709
3710#
3711# the hardware does not save the default result to memory on enabled
3712# inexact exceptions. we do this here before passing control to
3713# the user inexact handler.
3714#
3715# byte, word, and long destination format operations can pass
3716# through here. so can double and single precision.
3717# although packed opclass three operations can take inexact
3718# exceptions, they won't pass through here since they are caught
3719# first by the unsupported data format exception handler. that handler
3720# sends them directly to _real_inex() if necessary.
3721#
3722finex_out:
3723
3724 mov.b &NORM,STAG(%a6) # src is a NORM
3725
3726 clr.l %d0
3727 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3728
3729 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3730
3731 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3732
3733 bsr.l fout # store the default result
3734
3735 bra.b finex_exit
3736
3737#########################################################################
3738# XDEF **************************************************************** #
3739# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3740# #
3741# This handler should be the first code executed upon taking #
3742# the FP DZ exception in an operating system. #
3743# #
3744# XREF **************************************************************** #
3745# _imem_read_long() - read instruction longword from memory #
3746# fix_skewed_ops() - adjust fsave operand #
3747# _real_dz() - "callout" exit point from FP DZ handler #
3748# #
3749# INPUT *************************************************************** #
3750# - The system stack contains the FP DZ exception stack. #
3751# - The fsave frame contains the source operand. #
3752# #
3753# OUTPUT ************************************************************** #
3754# - The system stack contains the FP DZ exception stack. #
3755# - The fsave frame contains the adjusted source operand. #
3756# #
3757# ALGORITHM *********************************************************** #
3758# In a system where the DZ exception is enabled, the goal is to #
3759# get to the handler specified at _real_dz(). But, on the 060, when the #
3760# exception is taken, the input operand in the fsave state frame may #
3761# be incorrect for some cases and need to be adjusted. So, this package #
3762# adjusts the operand using fix_skewed_ops() and then branches to #
3763# _real_dz(). #
3764# #
3765#########################################################################
3766
3767 global _fpsp_dz
3768_fpsp_dz:
3769
3770 link.w %a6,&-LOCAL_SIZE # init stack frame
3771
3772 fsave FP_SRC(%a6) # grab the "busy" frame
3773
3774 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3775 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3777
3778# the FPIAR holds the "current PC" of the faulting instruction
3779 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780
3781 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3782 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3783 bsr.l _imem_read_long # fetch the instruction words
3784 mov.l %d0,EXC_OPWORD(%a6)
3785
3786##############################################################################
3787
3788
3789# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790# this would be the case for opclass two operations with a source zero
3791# in the sgl or dbl format.
3792 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3793 bsr.l fix_skewed_ops # fix src op
3794
3795fdz_exit:
3796 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3797 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3799
3800 frestore FP_SRC(%a6)
3801
3802 unlk %a6
3803 bra.l _real_dz
3804
3805#########################################################################
3806# XDEF **************************************************************** #
3807# _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3808# exception when the "reduced" version of the #
3809# FPSP is implemented that does not emulate #
3810# FP unimplemented instructions. #
3811# #
3812# This handler should be the first code executed upon taking a #
3813# "Line F Emulator" exception in an operating system integrating #
3814# the reduced version of 060FPSP. #
3815# #
3816# XREF **************************************************************** #
3817# _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3818# _real_fline() - Handle all other cases (treated equally) #
3819# #
3820# INPUT *************************************************************** #
3821# - The system stack contains a "Line F Emulator" exception #
3822# stack frame. #
3823# #
3824# OUTPUT ************************************************************** #
3825# - The system stack is unchanged. #
3826# #
3827# ALGORITHM *********************************************************** #
3828# When a "Line F Emulator" exception occurs in a system where #
3829# "FPU Unimplemented" instructions will not be emulated, the exception #
3830# can occur because then FPU is disabled or the instruction is to be #
3831# classifed as "Line F". This module determines which case exists and #
3832# calls the appropriate "callout". #
3833# #
3834#########################################################################
3835
3836 global _fpsp_fline
3837_fpsp_fline:
3838
3839# check to see if the FPU is disabled. if so, jump to the OS entry
3840# point for that condition.
3841 cmpi.w 0x6(%sp),&0x402c
3842 beq.l _real_fpu_disabled
3843
3844 bra.l _real_fline
3845
3846#########################################################################
3847# XDEF **************************************************************** #
3848# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3849# #
3850# XREF **************************************************************** #
3851# inc_areg() - increment an address register #
3852# dec_areg() - decrement an address register #
3853# #
3854# INPUT *************************************************************** #
3855# d0 = number of bytes to adjust <ea> by #
3856# #
3857# OUTPUT ************************************************************** #
3858# None #
3859# #
3860# ALGORITHM *********************************************************** #
3861# "Dummy" CALCulate Effective Address: #
3862# The stacked <ea> for FP unimplemented instructions and opclass #
3863# two packed instructions is correct with the exception of... #
3864# #
3865# 1) -(An) : The register is not updated regardless of size. #
3866# Also, for extended precision and packed, the #
3867# stacked <ea> value is 8 bytes too big #
3868# 2) (An)+ : The register is not updated. #
3869# 3) #<data> : The upper longword of the immediate operand is #
3870# stacked b,w,l and s sizes are completely stacked. #
3871# d,x, and p are not. #
3872# #
3873#########################################################################
3874
3875 global _dcalc_ea
3876_dcalc_ea:
3877 mov.l %d0, %a0 # move # bytes to %a0
3878
3879 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3880 mov.l %d0, %d1 # make a copy
3881
3882 andi.w &0x38, %d0 # extract mode field
3883 andi.l &0x7, %d1 # extract reg field
3884
3885 cmpi.b %d0,&0x18 # is mode (An)+ ?
3886 beq.b dcea_pi # yes
3887
3888 cmpi.b %d0,&0x20 # is mode -(An) ?
3889 beq.b dcea_pd # yes
3890
3891 or.w %d1,%d0 # concat mode,reg
3892 cmpi.b %d0,&0x3c # is mode #<data>?
3893
3894 beq.b dcea_imm # yes
3895
3896 mov.l EXC_EA(%a6),%a0 # return <ea>
3897 rts
3898
3899# need to set immediate data flag here since we'll need to do
3900# an imem_read to fetch this later.
3901dcea_imm:
3902 mov.b &immed_flg,SPCOND_FLG(%a6)
3903 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904 rts
3905
3906# here, the <ea> is stacked correctly. however, we must update the
3907# address register...
3908dcea_pi:
3909 mov.l %a0,%d0 # pass amt to inc by
3910 bsr.l inc_areg # inc addr register
3911
3912 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3913 rts
3914
3915# the <ea> is stacked correctly for all but extended and packed which
3916# the <ea>s are 8 bytes too large.
3917# it would make no sense to have a pre-decrement to a7 in supervisor
3918# mode so we don't even worry about this tricky case here : )
3919dcea_pd:
3920 mov.l %a0,%d0 # pass amt to dec by
3921 bsr.l dec_areg # dec addr register
3922
3923 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3924
3925 cmpi.b %d0,&0xc # is opsize ext or packed?
3926 beq.b dcea_pd2 # yes
3927 rts
3928dcea_pd2:
3929 sub.l &0x8,%a0 # correct <ea>
3930 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3931 rts
3932
3933#########################################################################
3934# XDEF **************************************************************** #
3935# _calc_ea_fout(): calculate correct stacked <ea> for extended #
3936# and packed data opclass 3 operations. #
3937# #
3938# XREF **************************************************************** #
3939# None #
3940# #
3941# INPUT *************************************************************** #
3942# None #
3943# #
3944# OUTPUT ************************************************************** #
3945# a0 = return correct effective address #
3946# #
3947# ALGORITHM *********************************************************** #
3948# For opclass 3 extended and packed data operations, the <ea> #
3949# stacked for the exception is incorrect for -(an) and (an)+ addressing #
3950# modes. Also, while we're at it, the index register itself must get #
3951# updated. #
3952# So, for -(an), we must subtract 8 off of the stacked <ea> value #
3953# and return that value as the correct <ea> and store that value in An. #
3954# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3955# #
3956#########################################################################
3957
3958# This calc_ea is currently used to retrieve the correct <ea>
3959# for fmove outs of type extended and packed.
3960 global _calc_ea_fout
3961_calc_ea_fout:
3962 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3963 mov.l %d0,%d1 # make a copy
3964
3965 andi.w &0x38,%d0 # extract mode field
3966 andi.l &0x7,%d1 # extract reg field
3967
3968 cmpi.b %d0,&0x18 # is mode (An)+ ?
3969 beq.b ceaf_pi # yes
3970
3971 cmpi.b %d0,&0x20 # is mode -(An) ?
3972 beq.w ceaf_pd # yes
3973
3974 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3975 rts
3976
3977# (An)+ : extended and packed fmove out
3978# : stacked <ea> is correct
3979# : "An" not updated
3980ceaf_pi:
3981 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982 mov.l EXC_EA(%a6),%a0
3983 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3984
3985 swbeg &0x8
3986tbl_ceaf_pi:
3987 short ceaf_pi0 - tbl_ceaf_pi
3988 short ceaf_pi1 - tbl_ceaf_pi
3989 short ceaf_pi2 - tbl_ceaf_pi
3990 short ceaf_pi3 - tbl_ceaf_pi
3991 short ceaf_pi4 - tbl_ceaf_pi
3992 short ceaf_pi5 - tbl_ceaf_pi
3993 short ceaf_pi6 - tbl_ceaf_pi
3994 short ceaf_pi7 - tbl_ceaf_pi
3995
3996ceaf_pi0:
3997 addi.l &0xc,EXC_DREGS+0x8(%a6)
3998 rts
3999ceaf_pi1:
4000 addi.l &0xc,EXC_DREGS+0xc(%a6)
4001 rts
4002ceaf_pi2:
4003 add.l &0xc,%a2
4004 rts
4005ceaf_pi3:
4006 add.l &0xc,%a3
4007 rts
4008ceaf_pi4:
4009 add.l &0xc,%a4
4010 rts
4011ceaf_pi5:
4012 add.l &0xc,%a5
4013 rts
4014ceaf_pi6:
4015 addi.l &0xc,EXC_A6(%a6)
4016 rts
4017ceaf_pi7:
4018 mov.b &mia7_flg,SPCOND_FLG(%a6)
4019 addi.l &0xc,EXC_A7(%a6)
4020 rts
4021
4022# -(An) : extended and packed fmove out
4023# : stacked <ea> = actual <ea> + 8
4024# : "An" not updated
4025ceaf_pd:
4026 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027 mov.l EXC_EA(%a6),%a0
4028 sub.l &0x8,%a0
4029 sub.l &0x8,EXC_EA(%a6)
4030 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4031
4032 swbeg &0x8
4033tbl_ceaf_pd:
4034 short ceaf_pd0 - tbl_ceaf_pd
4035 short ceaf_pd1 - tbl_ceaf_pd
4036 short ceaf_pd2 - tbl_ceaf_pd
4037 short ceaf_pd3 - tbl_ceaf_pd
4038 short ceaf_pd4 - tbl_ceaf_pd
4039 short ceaf_pd5 - tbl_ceaf_pd
4040 short ceaf_pd6 - tbl_ceaf_pd
4041 short ceaf_pd7 - tbl_ceaf_pd
4042
4043ceaf_pd0:
4044 mov.l %a0,EXC_DREGS+0x8(%a6)
4045 rts
4046ceaf_pd1:
4047 mov.l %a0,EXC_DREGS+0xc(%a6)
4048 rts
4049ceaf_pd2:
4050 mov.l %a0,%a2
4051 rts
4052ceaf_pd3:
4053 mov.l %a0,%a3
4054 rts
4055ceaf_pd4:
4056 mov.l %a0,%a4
4057 rts
4058ceaf_pd5:
4059 mov.l %a0,%a5
4060 rts
4061ceaf_pd6:
4062 mov.l %a0,EXC_A6(%a6)
4063 rts
4064ceaf_pd7:
4065 mov.l %a0,EXC_A7(%a6)
4066 mov.b &mda7_flg,SPCOND_FLG(%a6)
4067 rts
4068
4069#
4070# This table holds the offsets of the emulation routines for each individual
4071# math operation relative to the address of this table. Included are
4072# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073# this table is for the version if the 060FPSP without transcendentals.
4074# The location within the table is determined by the extension bits of the
4075# operation longword.
4076#
4077
4078 swbeg &109
4079tbl_unsupp:
4080 long fin - tbl_unsupp # 00: fmove
4081 long fint - tbl_unsupp # 01: fint
4082 long tbl_unsupp - tbl_unsupp # 02: fsinh
4083 long fintrz - tbl_unsupp # 03: fintrz
4084 long fsqrt - tbl_unsupp # 04: fsqrt
4085 long tbl_unsupp - tbl_unsupp
4086 long tbl_unsupp - tbl_unsupp # 06: flognp1
4087 long tbl_unsupp - tbl_unsupp
4088 long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4089 long tbl_unsupp - tbl_unsupp # 09: ftanh
4090 long tbl_unsupp - tbl_unsupp # 0a: fatan
4091 long tbl_unsupp - tbl_unsupp
4092 long tbl_unsupp - tbl_unsupp # 0c: fasin
4093 long tbl_unsupp - tbl_unsupp # 0d: fatanh
4094 long tbl_unsupp - tbl_unsupp # 0e: fsin
4095 long tbl_unsupp - tbl_unsupp # 0f: ftan
4096 long tbl_unsupp - tbl_unsupp # 10: fetox
4097 long tbl_unsupp - tbl_unsupp # 11: ftwotox
4098 long tbl_unsupp - tbl_unsupp # 12: ftentox
4099 long tbl_unsupp - tbl_unsupp
4100 long tbl_unsupp - tbl_unsupp # 14: flogn
4101 long tbl_unsupp - tbl_unsupp # 15: flog10
4102 long tbl_unsupp - tbl_unsupp # 16: flog2
4103 long tbl_unsupp - tbl_unsupp
4104 long fabs - tbl_unsupp # 18: fabs
4105 long tbl_unsupp - tbl_unsupp # 19: fcosh
4106 long fneg - tbl_unsupp # 1a: fneg
4107 long tbl_unsupp - tbl_unsupp
4108 long tbl_unsupp - tbl_unsupp # 1c: facos
4109 long tbl_unsupp - tbl_unsupp # 1d: fcos
4110 long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4111 long tbl_unsupp - tbl_unsupp # 1f: fgetman
4112 long fdiv - tbl_unsupp # 20: fdiv
4113 long tbl_unsupp - tbl_unsupp # 21: fmod
4114 long fadd - tbl_unsupp # 22: fadd
4115 long fmul - tbl_unsupp # 23: fmul
4116 long fsgldiv - tbl_unsupp # 24: fsgldiv
4117 long tbl_unsupp - tbl_unsupp # 25: frem
4118 long tbl_unsupp - tbl_unsupp # 26: fscale
4119 long fsglmul - tbl_unsupp # 27: fsglmul
4120 long fsub - tbl_unsupp # 28: fsub
4121 long tbl_unsupp - tbl_unsupp
4122 long tbl_unsupp - tbl_unsupp
4123 long tbl_unsupp - tbl_unsupp
4124 long tbl_unsupp - tbl_unsupp
4125 long tbl_unsupp - tbl_unsupp
4126 long tbl_unsupp - tbl_unsupp
4127 long tbl_unsupp - tbl_unsupp
4128 long tbl_unsupp - tbl_unsupp # 30: fsincos
4129 long tbl_unsupp - tbl_unsupp # 31: fsincos
4130 long tbl_unsupp - tbl_unsupp # 32: fsincos
4131 long tbl_unsupp - tbl_unsupp # 33: fsincos
4132 long tbl_unsupp - tbl_unsupp # 34: fsincos
4133 long tbl_unsupp - tbl_unsupp # 35: fsincos
4134 long tbl_unsupp - tbl_unsupp # 36: fsincos
4135 long tbl_unsupp - tbl_unsupp # 37: fsincos
4136 long fcmp - tbl_unsupp # 38: fcmp
4137 long tbl_unsupp - tbl_unsupp
4138 long ftst - tbl_unsupp # 3a: ftst
4139 long tbl_unsupp - tbl_unsupp
4140 long tbl_unsupp - tbl_unsupp
4141 long tbl_unsupp - tbl_unsupp
4142 long tbl_unsupp - tbl_unsupp
4143 long tbl_unsupp - tbl_unsupp
4144 long fsin - tbl_unsupp # 40: fsmove
4145 long fssqrt - tbl_unsupp # 41: fssqrt
4146 long tbl_unsupp - tbl_unsupp
4147 long tbl_unsupp - tbl_unsupp
4148 long fdin - tbl_unsupp # 44: fdmove
4149 long fdsqrt - tbl_unsupp # 45: fdsqrt
4150 long tbl_unsupp - tbl_unsupp
4151 long tbl_unsupp - tbl_unsupp
4152 long tbl_unsupp - tbl_unsupp
4153 long tbl_unsupp - tbl_unsupp
4154 long tbl_unsupp - tbl_unsupp
4155 long tbl_unsupp - tbl_unsupp
4156 long tbl_unsupp - tbl_unsupp
4157 long tbl_unsupp - tbl_unsupp
4158 long tbl_unsupp - tbl_unsupp
4159 long tbl_unsupp - tbl_unsupp
4160 long tbl_unsupp - tbl_unsupp
4161 long tbl_unsupp - tbl_unsupp
4162 long tbl_unsupp - tbl_unsupp
4163 long tbl_unsupp - tbl_unsupp
4164 long tbl_unsupp - tbl_unsupp
4165 long tbl_unsupp - tbl_unsupp
4166 long tbl_unsupp - tbl_unsupp
4167 long tbl_unsupp - tbl_unsupp
4168 long fsabs - tbl_unsupp # 58: fsabs
4169 long tbl_unsupp - tbl_unsupp
4170 long fsneg - tbl_unsupp # 5a: fsneg
4171 long tbl_unsupp - tbl_unsupp
4172 long fdabs - tbl_unsupp # 5c: fdabs
4173 long tbl_unsupp - tbl_unsupp
4174 long fdneg - tbl_unsupp # 5e: fdneg
4175 long tbl_unsupp - tbl_unsupp
4176 long fsdiv - tbl_unsupp # 60: fsdiv
4177 long tbl_unsupp - tbl_unsupp
4178 long fsadd - tbl_unsupp # 62: fsadd
4179 long fsmul - tbl_unsupp # 63: fsmul
4180 long fddiv - tbl_unsupp # 64: fddiv
4181 long tbl_unsupp - tbl_unsupp
4182 long fdadd - tbl_unsupp # 66: fdadd
4183 long fdmul - tbl_unsupp # 67: fdmul
4184 long fssub - tbl_unsupp # 68: fssub
4185 long tbl_unsupp - tbl_unsupp
4186 long tbl_unsupp - tbl_unsupp
4187 long tbl_unsupp - tbl_unsupp
4188 long fdsub - tbl_unsupp # 6c: fdsub
4189
4190#################################################
4191# Add this here so non-fp modules can compile.
4192# (smovcr is called from fpsp_inex.)
4193 global smovcr
4194smovcr:
4195 bra.b smovcr
4196
4197#########################################################################
4198# XDEF **************************************************************** #
4199# fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4200# #
4201# XREF **************************************************************** #
4202# fetch_dreg() - fetch data register #
4203# {i,d,}mem_read() - fetch data from memory #
4204# _mem_write() - write data to memory #
4205# iea_iacc() - instruction memory access error occurred #
4206# iea_dacc() - data memory access error occurred #
4207# restore() - restore An index regs if access error occurred #
4208# #
4209# INPUT *************************************************************** #
4210# None #
4211# #
4212# OUTPUT ************************************************************** #
4213# If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4214# d0 = size of dump #
4215# d1 = Dn #
4216# Else if instruction access error, #
4217# d0 = FSLW #
4218# Else if data access error, #
4219# d0 = FSLW #
4220# a0 = address of fault #
4221# Else #
4222# none. #
4223# #
4224# ALGORITHM *********************************************************** #
4225# The effective address must be calculated since this is entered #
4226# from an "Unimplemented Effective Address" exception handler. So, we #
4227# have our own fcalc_ea() routine here. If an access error is flagged #
4228# by a _{i,d,}mem_read() call, we must exit through the special #
4229# handler. #
4230# The data register is determined and its value loaded to get the #
4231# string of FP registers affected. This value is used as an index into #
4232# a lookup table such that we can determine the number of bytes #
4233# involved. #
4234# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4235# to read in all FP values. Again, _mem_read() may fail and require a #
4236# special exit. #
4237# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4238# to write all FP values. _mem_write() may also fail. #
4239# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4240# then we return the size of the dump and the string to the caller #
4241# so that the move can occur outside of this routine. This special #
4242# case is required so that moves to the system stack are handled #
4243# correctly. #
4244# #
4245# DYNAMIC: #
4246# fmovm.x dn, <ea> #
4247# fmovm.x <ea>, dn #
4248# #
4249# <WORD 1> <WORD2> #
4250# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4251# #
4252# & = (0): predecrement addressing mode #
4253# (1): postincrement or control addressing mode #
4254# @ = (0): move listed regs from memory to the FPU #
4255# (1): move listed regs from the FPU to memory #
4256# $$$ : index of data register holding reg select mask #
4257# #
4258# NOTES: #
4259# If the data register holds a zero, then the #
4260# instruction is a nop. #
4261# #
4262#########################################################################
4263
4264 global fmovm_dynamic
4265fmovm_dynamic:
4266
4267# extract the data register in which the bit string resides...
4268 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4269 andi.w &0x70,%d1 # extract reg bits
4270 lsr.b &0x4,%d1 # shift into lo bits
4271
4272# fetch the bit string into d0...
4273 bsr.l fetch_dreg # fetch reg string
4274
4275 andi.l &0x000000ff,%d0 # keep only lo byte
4276
4277 mov.l %d0,-(%sp) # save strg
4278 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4279 mov.l %d0,-(%sp) # save size
4280 bsr.l fmovm_calc_ea # calculate <ea>
4281 mov.l (%sp)+,%d0 # restore size
4282 mov.l (%sp)+,%d1 # restore strg
4283
4284# if the bit string is a zero, then the operation is a no-op
4285# but, make sure that we've calculated ea and advanced the opword pointer
4286 beq.w fmovm_data_done
4287
4288# separate move ins from move outs...
4289 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4290 beq.w fmovm_data_in # it's a move out
4291
4292#############
4293# MOVE OUT: #
4294#############
4295fmovm_data_out:
4296 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4297 bne.w fmovm_out_ctrl # control
4298
4299############################
4300fmovm_out_predec:
4301# for predecrement mode, the bit string is the opposite of both control
4302# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303# here, we convert it to be just like the others...
4304 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305
4306 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4307 beq.b fmovm_out_ctrl # user
4308
4309fmovm_out_predec_s:
4310 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311 bne.b fmovm_out_ctrl
4312
4313# the operation was unfortunately an: fmovm.x dn,-(sp)
4314# called from supervisor mode.
4315# we're also passing "size" and "strg" back to the calling routine
4316 rts
4317
4318############################
4319fmovm_out_ctrl:
4320 mov.l %a0,%a1 # move <ea> to a1
4321
4322 sub.l %d0,%sp # subtract size of dump
4323 lea (%sp),%a0
4324
4325 tst.b %d1 # should FP0 be moved?
4326 bpl.b fmovm_out_ctrl_fp1 # no
4327
4328 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4329 mov.l 0x4+EXC_FP0(%a6),(%a0)+
4330 mov.l 0x8+EXC_FP0(%a6),(%a0)+
4331
4332fmovm_out_ctrl_fp1:
4333 lsl.b &0x1,%d1 # should FP1 be moved?
4334 bpl.b fmovm_out_ctrl_fp2 # no
4335
4336 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4337 mov.l 0x4+EXC_FP1(%a6),(%a0)+
4338 mov.l 0x8+EXC_FP1(%a6),(%a0)+
4339
4340fmovm_out_ctrl_fp2:
4341 lsl.b &0x1,%d1 # should FP2 be moved?
4342 bpl.b fmovm_out_ctrl_fp3 # no
4343
4344 fmovm.x &0x20,(%a0) # yes
4345 add.l &0xc,%a0
4346
4347fmovm_out_ctrl_fp3:
4348 lsl.b &0x1,%d1 # should FP3 be moved?
4349 bpl.b fmovm_out_ctrl_fp4 # no
4350
4351 fmovm.x &0x10,(%a0) # yes
4352 add.l &0xc,%a0
4353
4354fmovm_out_ctrl_fp4:
4355 lsl.b &0x1,%d1 # should FP4 be moved?
4356 bpl.b fmovm_out_ctrl_fp5 # no
4357
4358 fmovm.x &0x08,(%a0) # yes
4359 add.l &0xc,%a0
4360
4361fmovm_out_ctrl_fp5:
4362 lsl.b &0x1,%d1 # should FP5 be moved?
4363 bpl.b fmovm_out_ctrl_fp6 # no
4364
4365 fmovm.x &0x04,(%a0) # yes
4366 add.l &0xc,%a0
4367
4368fmovm_out_ctrl_fp6:
4369 lsl.b &0x1,%d1 # should FP6 be moved?
4370 bpl.b fmovm_out_ctrl_fp7 # no
4371
4372 fmovm.x &0x02,(%a0) # yes
4373 add.l &0xc,%a0
4374
4375fmovm_out_ctrl_fp7:
4376 lsl.b &0x1,%d1 # should FP7 be moved?
4377 bpl.b fmovm_out_ctrl_done # no
4378
4379 fmovm.x &0x01,(%a0) # yes
4380 add.l &0xc,%a0
4381
4382fmovm_out_ctrl_done:
4383 mov.l %a1,L_SCR1(%a6)
4384
4385 lea (%sp),%a0 # pass: supervisor src
4386 mov.l %d0,-(%sp) # save size
4387 bsr.l _dmem_write # copy data to user mem
4388
4389 mov.l (%sp)+,%d0
4390 add.l %d0,%sp # clear fpreg data from stack
4391
4392 tst.l %d1 # did dstore err?
4393 bne.w fmovm_out_err # yes
4394
4395 rts
4396
4397############
4398# MOVE IN: #
4399############
4400fmovm_data_in:
4401 mov.l %a0,L_SCR1(%a6)
4402
4403 sub.l %d0,%sp # make room for fpregs
4404 lea (%sp),%a1
4405
4406 mov.l %d1,-(%sp) # save bit string for later
4407 mov.l %d0,-(%sp) # save # of bytes
4408
4409 bsr.l _dmem_read # copy data from user mem
4410
4411 mov.l (%sp)+,%d0 # retrieve # of bytes
4412
4413 tst.l %d1 # did dfetch fail?
4414 bne.w fmovm_in_err # yes
4415
4416 mov.l (%sp)+,%d1 # load bit string
4417
4418 lea (%sp),%a0 # addr of stack
4419
4420 tst.b %d1 # should FP0 be moved?
4421 bpl.b fmovm_data_in_fp1 # no
4422
4423 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4424 mov.l (%a0)+,0x4+EXC_FP0(%a6)
4425 mov.l (%a0)+,0x8+EXC_FP0(%a6)
4426
4427fmovm_data_in_fp1:
4428 lsl.b &0x1,%d1 # should FP1 be moved?
4429 bpl.b fmovm_data_in_fp2 # no
4430
4431 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4432 mov.l (%a0)+,0x4+EXC_FP1(%a6)
4433 mov.l (%a0)+,0x8+EXC_FP1(%a6)
4434
4435fmovm_data_in_fp2:
4436 lsl.b &0x1,%d1 # should FP2 be moved?
4437 bpl.b fmovm_data_in_fp3 # no
4438
4439 fmovm.x (%a0)+,&0x20 # yes
4440
4441fmovm_data_in_fp3:
4442 lsl.b &0x1,%d1 # should FP3 be moved?
4443 bpl.b fmovm_data_in_fp4 # no
4444
4445 fmovm.x (%a0)+,&0x10 # yes
4446
4447fmovm_data_in_fp4:
4448 lsl.b &0x1,%d1 # should FP4 be moved?
4449 bpl.b fmovm_data_in_fp5 # no
4450
4451 fmovm.x (%a0)+,&0x08 # yes
4452
4453fmovm_data_in_fp5:
4454 lsl.b &0x1,%d1 # should FP5 be moved?
4455 bpl.b fmovm_data_in_fp6 # no
4456
4457 fmovm.x (%a0)+,&0x04 # yes
4458
4459fmovm_data_in_fp6:
4460 lsl.b &0x1,%d1 # should FP6 be moved?
4461 bpl.b fmovm_data_in_fp7 # no
4462
4463 fmovm.x (%a0)+,&0x02 # yes
4464
4465fmovm_data_in_fp7:
4466 lsl.b &0x1,%d1 # should FP7 be moved?
4467 bpl.b fmovm_data_in_done # no
4468
4469 fmovm.x (%a0)+,&0x01 # yes
4470
4471fmovm_data_in_done:
4472 add.l %d0,%sp # remove fpregs from stack
4473 rts
4474
4475#####################################
4476
4477fmovm_data_done:
4478 rts
4479
4480##############################################################################
4481
4482#
4483# table indexed by the operation's bit string that gives the number
4484# of bytes that will be moved.
4485#
4486# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487#
4488tbl_fmovm_size:
4489 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521
4522#
4523# table to convert a pre-decrement bit string into a post-increment
4524# or control bit string.
4525# ex: 0x00 ==> 0x00
4526# 0x01 ==> 0x80
4527# 0x02 ==> 0x40
4528# .
4529# .
4530# 0xfd ==> 0xbf
4531# 0xfe ==> 0x7f
4532# 0xff ==> 0xff
4533#
4534tbl_fmovm_convert:
4535 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567
4568 global fmovm_calc_ea
4569###############################################
4570# _fmovm_calc_ea: calculate effective address #
4571###############################################
4572fmovm_calc_ea:
4573 mov.l %d0,%a0 # move # bytes to a0
4574
4575# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576# easily changed if they were inputs passed in registers.
4577 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4578 mov.w %d0,%d1 # make a copy
4579
4580 andi.w &0x3f,%d0 # extract mode field
4581 andi.l &0x7,%d1 # extract reg field
4582
4583# jump to the corresponding function for each {MODE,REG} pair.
4584 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586
4587 swbeg &64
4588tbl_fea_mode:
4589 short tbl_fea_mode - tbl_fea_mode
4590 short tbl_fea_mode - tbl_fea_mode
4591 short tbl_fea_mode - tbl_fea_mode
4592 short tbl_fea_mode - tbl_fea_mode
4593 short tbl_fea_mode - tbl_fea_mode
4594 short tbl_fea_mode - tbl_fea_mode
4595 short tbl_fea_mode - tbl_fea_mode
4596 short tbl_fea_mode - tbl_fea_mode
4597
4598 short tbl_fea_mode - tbl_fea_mode
4599 short tbl_fea_mode - tbl_fea_mode
4600 short tbl_fea_mode - tbl_fea_mode
4601 short tbl_fea_mode - tbl_fea_mode
4602 short tbl_fea_mode - tbl_fea_mode
4603 short tbl_fea_mode - tbl_fea_mode
4604 short tbl_fea_mode - tbl_fea_mode
4605 short tbl_fea_mode - tbl_fea_mode
4606
4607 short faddr_ind_a0 - tbl_fea_mode
4608 short faddr_ind_a1 - tbl_fea_mode
4609 short faddr_ind_a2 - tbl_fea_mode
4610 short faddr_ind_a3 - tbl_fea_mode
4611 short faddr_ind_a4 - tbl_fea_mode
4612 short faddr_ind_a5 - tbl_fea_mode
4613 short faddr_ind_a6 - tbl_fea_mode
4614 short faddr_ind_a7 - tbl_fea_mode
4615
4616 short faddr_ind_p_a0 - tbl_fea_mode
4617 short faddr_ind_p_a1 - tbl_fea_mode
4618 short faddr_ind_p_a2 - tbl_fea_mode
4619 short faddr_ind_p_a3 - tbl_fea_mode
4620 short faddr_ind_p_a4 - tbl_fea_mode
4621 short faddr_ind_p_a5 - tbl_fea_mode
4622 short faddr_ind_p_a6 - tbl_fea_mode
4623 short faddr_ind_p_a7 - tbl_fea_mode
4624
4625 short faddr_ind_m_a0 - tbl_fea_mode
4626 short faddr_ind_m_a1 - tbl_fea_mode
4627 short faddr_ind_m_a2 - tbl_fea_mode
4628 short faddr_ind_m_a3 - tbl_fea_mode
4629 short faddr_ind_m_a4 - tbl_fea_mode
4630 short faddr_ind_m_a5 - tbl_fea_mode
4631 short faddr_ind_m_a6 - tbl_fea_mode
4632 short faddr_ind_m_a7 - tbl_fea_mode
4633
4634 short faddr_ind_disp_a0 - tbl_fea_mode
4635 short faddr_ind_disp_a1 - tbl_fea_mode
4636 short faddr_ind_disp_a2 - tbl_fea_mode
4637 short faddr_ind_disp_a3 - tbl_fea_mode
4638 short faddr_ind_disp_a4 - tbl_fea_mode
4639 short faddr_ind_disp_a5 - tbl_fea_mode
4640 short faddr_ind_disp_a6 - tbl_fea_mode
4641 short faddr_ind_disp_a7 - tbl_fea_mode
4642
4643 short faddr_ind_ext - tbl_fea_mode
4644 short faddr_ind_ext - tbl_fea_mode
4645 short faddr_ind_ext - tbl_fea_mode
4646 short faddr_ind_ext - tbl_fea_mode
4647 short faddr_ind_ext - tbl_fea_mode
4648 short faddr_ind_ext - tbl_fea_mode
4649 short faddr_ind_ext - tbl_fea_mode
4650 short faddr_ind_ext - tbl_fea_mode
4651
4652 short fabs_short - tbl_fea_mode
4653 short fabs_long - tbl_fea_mode
4654 short fpc_ind - tbl_fea_mode
4655 short fpc_ind_ext - tbl_fea_mode
4656 short tbl_fea_mode - tbl_fea_mode
4657 short tbl_fea_mode - tbl_fea_mode
4658 short tbl_fea_mode - tbl_fea_mode
4659 short tbl_fea_mode - tbl_fea_mode
4660
4661###################################
4662# Address register indirect: (An) #
4663###################################
4664faddr_ind_a0:
4665 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4666 rts
4667
4668faddr_ind_a1:
4669 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4670 rts
4671
4672faddr_ind_a2:
4673 mov.l %a2,%a0 # Get current a2
4674 rts
4675
4676faddr_ind_a3:
4677 mov.l %a3,%a0 # Get current a3
4678 rts
4679
4680faddr_ind_a4:
4681 mov.l %a4,%a0 # Get current a4
4682 rts
4683
4684faddr_ind_a5:
4685 mov.l %a5,%a0 # Get current a5
4686 rts
4687
4688faddr_ind_a6:
4689 mov.l (%a6),%a0 # Get current a6
4690 rts
4691
4692faddr_ind_a7:
4693 mov.l EXC_A7(%a6),%a0 # Get current a7
4694 rts
4695
4696#####################################################
4697# Address register indirect w/ postincrement: (An)+ #
4698#####################################################
4699faddr_ind_p_a0:
4700 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4701 mov.l %d0,%d1
4702 add.l %a0,%d1 # Increment
4703 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4704 mov.l %d0,%a0
4705 rts
4706
4707faddr_ind_p_a1:
4708 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4709 mov.l %d0,%d1
4710 add.l %a0,%d1 # Increment
4711 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4712 mov.l %d0,%a0
4713 rts
4714
4715faddr_ind_p_a2:
4716 mov.l %a2,%d0 # Get current a2
4717 mov.l %d0,%d1
4718 add.l %a0,%d1 # Increment
4719 mov.l %d1,%a2 # Save incr value
4720 mov.l %d0,%a0
4721 rts
4722
4723faddr_ind_p_a3:
4724 mov.l %a3,%d0 # Get current a3
4725 mov.l %d0,%d1
4726 add.l %a0,%d1 # Increment
4727 mov.l %d1,%a3 # Save incr value
4728 mov.l %d0,%a0
4729 rts
4730
4731faddr_ind_p_a4:
4732 mov.l %a4,%d0 # Get current a4
4733 mov.l %d0,%d1
4734 add.l %a0,%d1 # Increment
4735 mov.l %d1,%a4 # Save incr value
4736 mov.l %d0,%a0
4737 rts
4738
4739faddr_ind_p_a5:
4740 mov.l %a5,%d0 # Get current a5
4741 mov.l %d0,%d1
4742 add.l %a0,%d1 # Increment
4743 mov.l %d1,%a5 # Save incr value
4744 mov.l %d0,%a0
4745 rts
4746
4747faddr_ind_p_a6:
4748 mov.l (%a6),%d0 # Get current a6
4749 mov.l %d0,%d1
4750 add.l %a0,%d1 # Increment
4751 mov.l %d1,(%a6) # Save incr value
4752 mov.l %d0,%a0
4753 rts
4754
4755faddr_ind_p_a7:
4756 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757
4758 mov.l EXC_A7(%a6),%d0 # Get current a7
4759 mov.l %d0,%d1
4760 add.l %a0,%d1 # Increment
4761 mov.l %d1,EXC_A7(%a6) # Save incr value
4762 mov.l %d0,%a0
4763 rts
4764
4765####################################################
4766# Address register indirect w/ predecrement: -(An) #
4767####################################################
4768faddr_ind_m_a0:
4769 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4770 sub.l %a0,%d0 # Decrement
4771 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4772 mov.l %d0,%a0
4773 rts
4774
4775faddr_ind_m_a1:
4776 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4777 sub.l %a0,%d0 # Decrement
4778 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4779 mov.l %d0,%a0
4780 rts
4781
4782faddr_ind_m_a2:
4783 mov.l %a2,%d0 # Get current a2
4784 sub.l %a0,%d0 # Decrement
4785 mov.l %d0,%a2 # Save decr value
4786 mov.l %d0,%a0
4787 rts
4788
4789faddr_ind_m_a3:
4790 mov.l %a3,%d0 # Get current a3
4791 sub.l %a0,%d0 # Decrement
4792 mov.l %d0,%a3 # Save decr value
4793 mov.l %d0,%a0
4794 rts
4795
4796faddr_ind_m_a4:
4797 mov.l %a4,%d0 # Get current a4
4798 sub.l %a0,%d0 # Decrement
4799 mov.l %d0,%a4 # Save decr value
4800 mov.l %d0,%a0
4801 rts
4802
4803faddr_ind_m_a5:
4804 mov.l %a5,%d0 # Get current a5
4805 sub.l %a0,%d0 # Decrement
4806 mov.l %d0,%a5 # Save decr value
4807 mov.l %d0,%a0
4808 rts
4809
4810faddr_ind_m_a6:
4811 mov.l (%a6),%d0 # Get current a6
4812 sub.l %a0,%d0 # Decrement
4813 mov.l %d0,(%a6) # Save decr value
4814 mov.l %d0,%a0
4815 rts
4816
4817faddr_ind_m_a7:
4818 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819
4820 mov.l EXC_A7(%a6),%d0 # Get current a7
4821 sub.l %a0,%d0 # Decrement
4822 mov.l %d0,EXC_A7(%a6) # Save decr value
4823 mov.l %d0,%a0
4824 rts
4825
4826########################################################
4827# Address register indirect w/ displacement: (d16, An) #
4828########################################################
4829faddr_ind_disp_a0:
4830 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4831 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4832 bsr.l _imem_read_word
4833
4834 tst.l %d1 # did ifetch fail?
4835 bne.l iea_iacc # yes
4836
4837 mov.w %d0,%a0 # sign extend displacement
4838
4839 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4840 rts
4841
4842faddr_ind_disp_a1:
4843 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4844 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4845 bsr.l _imem_read_word
4846
4847 tst.l %d1 # did ifetch fail?
4848 bne.l iea_iacc # yes
4849
4850 mov.w %d0,%a0 # sign extend displacement
4851
4852 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4853 rts
4854
4855faddr_ind_disp_a2:
4856 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4857 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4858 bsr.l _imem_read_word
4859
4860 tst.l %d1 # did ifetch fail?
4861 bne.l iea_iacc # yes
4862
4863 mov.w %d0,%a0 # sign extend displacement
4864
4865 add.l %a2,%a0 # a2 + d16
4866 rts
4867
4868faddr_ind_disp_a3:
4869 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4870 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4871 bsr.l _imem_read_word
4872
4873 tst.l %d1 # did ifetch fail?
4874 bne.l iea_iacc # yes
4875
4876 mov.w %d0,%a0 # sign extend displacement
4877
4878 add.l %a3,%a0 # a3 + d16
4879 rts
4880
4881faddr_ind_disp_a4:
4882 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4883 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4884 bsr.l _imem_read_word
4885
4886 tst.l %d1 # did ifetch fail?
4887 bne.l iea_iacc # yes
4888
4889 mov.w %d0,%a0 # sign extend displacement
4890
4891 add.l %a4,%a0 # a4 + d16
4892 rts
4893
4894faddr_ind_disp_a5:
4895 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4896 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4897 bsr.l _imem_read_word
4898
4899 tst.l %d1 # did ifetch fail?
4900 bne.l iea_iacc # yes
4901
4902 mov.w %d0,%a0 # sign extend displacement
4903
4904 add.l %a5,%a0 # a5 + d16
4905 rts
4906
4907faddr_ind_disp_a6:
4908 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4909 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4910 bsr.l _imem_read_word
4911
4912 tst.l %d1 # did ifetch fail?
4913 bne.l iea_iacc # yes
4914
4915 mov.w %d0,%a0 # sign extend displacement
4916
4917 add.l (%a6),%a0 # a6 + d16
4918 rts
4919
4920faddr_ind_disp_a7:
4921 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4922 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4923 bsr.l _imem_read_word
4924
4925 tst.l %d1 # did ifetch fail?
4926 bne.l iea_iacc # yes
4927
4928 mov.w %d0,%a0 # sign extend displacement
4929
4930 add.l EXC_A7(%a6),%a0 # a7 + d16
4931 rts
4932
4933########################################################################
4934# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935# " " " w/ " (base displacement): (bd, An, Xn) #
4936# Memory indirect postindexed: ([bd, An], Xn, od) #
4937# Memory indirect preindexed: ([bd, An, Xn], od) #
4938########################################################################
4939faddr_ind_ext:
4940 addq.l &0x8,%d1
4941 bsr.l fetch_dreg # fetch base areg
4942 mov.l %d0,-(%sp)
4943
4944 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4945 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4946 bsr.l _imem_read_word # fetch extword in d0
4947
4948 tst.l %d1 # did ifetch fail?
4949 bne.l iea_iacc # yes
4950
4951 mov.l (%sp)+,%a0
4952
4953 btst &0x8,%d0
4954 bne.w fcalc_mem_ind
4955
4956 mov.l %d0,L_SCR1(%a6) # hold opword
4957
4958 mov.l %d0,%d1
4959 rol.w &0x4,%d1
4960 andi.w &0xf,%d1 # extract index regno
4961
4962# count on fetch_dreg() not to alter a0...
4963 bsr.l fetch_dreg # fetch index
4964
4965 mov.l %d2,-(%sp) # save d2
4966 mov.l L_SCR1(%a6),%d2 # fetch opword
4967
4968 btst &0xb,%d2 # is it word or long?
4969 bne.b faii8_long
4970 ext.l %d0 # sign extend word index
4971faii8_long:
4972 mov.l %d2,%d1
4973 rol.w &0x7,%d1
4974 andi.l &0x3,%d1 # extract scale value
4975
4976 lsl.l %d1,%d0 # shift index by scale
4977
4978 extb.l %d2 # sign extend displacement
4979 add.l %d2,%d0 # index + disp
4980 add.l %d0,%a0 # An + (index + disp)
4981
4982 mov.l (%sp)+,%d2 # restore old d2
4983 rts
4984
4985###########################
4986# Absolute short: (XXX).W #
4987###########################
4988fabs_short:
4989 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4990 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4991 bsr.l _imem_read_word # fetch short address
4992
4993 tst.l %d1 # did ifetch fail?
4994 bne.l iea_iacc # yes
4995
4996 mov.w %d0,%a0 # return <ea> in a0
4997 rts
4998
4999##########################
5000# Absolute long: (XXX).L #
5001##########################
5002fabs_long:
5003 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5004 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5005 bsr.l _imem_read_long # fetch long address
5006
5007 tst.l %d1 # did ifetch fail?
5008 bne.l iea_iacc # yes
5009
5010 mov.l %d0,%a0 # return <ea> in a0
5011 rts
5012
5013#######################################################
5014# Program counter indirect w/ displacement: (d16, PC) #
5015#######################################################
5016fpc_ind:
5017 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5018 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5019 bsr.l _imem_read_word # fetch word displacement
5020
5021 tst.l %d1 # did ifetch fail?
5022 bne.l iea_iacc # yes
5023
5024 mov.w %d0,%a0 # sign extend displacement
5025
5026 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5027
5028# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029 subq.l &0x2,%a0 # adjust <ea>
5030 rts
5031
5032##########################################################
5033# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034# " " w/ " (base displacement): (bd, PC, An) #
5035# PC memory indirect postindexed: ([bd, PC], Xn, od) #
5036# PC memory indirect preindexed: ([bd, PC, Xn], od) #
5037##########################################################
5038fpc_ind_ext:
5039 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5040 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5041 bsr.l _imem_read_word # fetch ext word
5042
5043 tst.l %d1 # did ifetch fail?
5044 bne.l iea_iacc # yes
5045
5046 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5047 subq.l &0x2,%a0 # adjust base
5048
5049 btst &0x8,%d0 # is disp only 8 bits?
5050 bne.w fcalc_mem_ind # calc memory indirect
5051
5052 mov.l %d0,L_SCR1(%a6) # store opword
5053
5054 mov.l %d0,%d1 # make extword copy
5055 rol.w &0x4,%d1 # rotate reg num into place
5056 andi.w &0xf,%d1 # extract register number
5057
5058# count on fetch_dreg() not to alter a0...
5059 bsr.l fetch_dreg # fetch index
5060
5061 mov.l %d2,-(%sp) # save d2
5062 mov.l L_SCR1(%a6),%d2 # fetch opword
5063
5064 btst &0xb,%d2 # is index word or long?
5065 bne.b fpii8_long # long
5066 ext.l %d0 # sign extend word index
5067fpii8_long:
5068 mov.l %d2,%d1
5069 rol.w &0x7,%d1 # rotate scale value into place
5070 andi.l &0x3,%d1 # extract scale value
5071
5072 lsl.l %d1,%d0 # shift index by scale
5073
5074 extb.l %d2 # sign extend displacement
5075 add.l %d2,%d0 # disp + index
5076 add.l %d0,%a0 # An + (index + disp)
5077
5078 mov.l (%sp)+,%d2 # restore temp register
5079 rts
5080
5081# d2 = index
5082# d3 = base
5083# d4 = od
5084# d5 = extword
5085fcalc_mem_ind:
5086 btst &0x6,%d0 # is the index suppressed?
5087 beq.b fcalc_index
5088
5089 movm.l &0x3c00,-(%sp) # save d2-d5
5090
5091 mov.l %d0,%d5 # put extword in d5
5092 mov.l %a0,%d3 # put base in d3
5093
5094 clr.l %d2 # yes, so index = 0
5095 bra.b fbase_supp_ck
5096
5097# index:
5098fcalc_index:
5099 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5100 bfextu %d0{&16:&4},%d1 # fetch dreg index
5101 bsr.l fetch_dreg
5102
5103 movm.l &0x3c00,-(%sp) # save d2-d5
5104 mov.l %d0,%d2 # put index in d2
5105 mov.l L_SCR1(%a6),%d5
5106 mov.l %a0,%d3
5107
5108 btst &0xb,%d5 # is index word or long?
5109 bne.b fno_ext
5110 ext.l %d2
5111
5112fno_ext:
5113 bfextu %d5{&21:&2},%d0
5114 lsl.l %d0,%d2
5115
5116# base address (passed as parameter in d3):
5117# we clear the value here if it should actually be suppressed.
5118fbase_supp_ck:
5119 btst &0x7,%d5 # is the bd suppressed?
5120 beq.b fno_base_sup
5121 clr.l %d3
5122
5123# base displacement:
5124fno_base_sup:
5125 bfextu %d5{&26:&2},%d0 # get bd size
5126# beq.l fmovm_error # if (size == 0) it's reserved
5127
5128 cmpi.b %d0,&0x2
5129 blt.b fno_bd
5130 beq.b fget_word_bd
5131
5132 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5133 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5134 bsr.l _imem_read_long
5135
5136 tst.l %d1 # did ifetch fail?
5137 bne.l fcea_iacc # yes
5138
5139 bra.b fchk_ind
5140
5141fget_word_bd:
5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5143 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5144 bsr.l _imem_read_word
5145
5146 tst.l %d1 # did ifetch fail?
5147 bne.l fcea_iacc # yes
5148
5149 ext.l %d0 # sign extend bd
5150
5151fchk_ind:
5152 add.l %d0,%d3 # base += bd
5153
5154# outer displacement:
5155fno_bd:
5156 bfextu %d5{&30:&2},%d0 # is od suppressed?
5157 beq.w faii_bd
5158
5159 cmpi.b %d0,&0x2
5160 blt.b fnull_od
5161 beq.b fword_od
5162
5163 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5164 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5165 bsr.l _imem_read_long
5166
5167 tst.l %d1 # did ifetch fail?
5168 bne.l fcea_iacc # yes
5169
5170 bra.b fadd_them
5171
5172fword_od:
5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5174 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5175 bsr.l _imem_read_word
5176
5177 tst.l %d1 # did ifetch fail?
5178 bne.l fcea_iacc # yes
5179
5180 ext.l %d0 # sign extend od
5181 bra.b fadd_them
5182
5183fnull_od:
5184 clr.l %d0
5185
5186fadd_them:
5187 mov.l %d0,%d4
5188
5189 btst &0x2,%d5 # pre or post indexing?
5190 beq.b fpre_indexed
5191
5192 mov.l %d3,%a0
5193 bsr.l _dmem_read_long
5194
5195 tst.l %d1 # did dfetch fail?
5196 bne.w fcea_err # yes
5197
5198 add.l %d2,%d0 # <ea> += index
5199 add.l %d4,%d0 # <ea> += od
5200 bra.b fdone_ea
5201
5202fpre_indexed:
5203 add.l %d2,%d3 # preindexing
5204 mov.l %d3,%a0
5205 bsr.l _dmem_read_long
5206
5207 tst.l %d1 # did dfetch fail?
5208 bne.w fcea_err # yes
5209
5210 add.l %d4,%d0 # ea += od
5211 bra.b fdone_ea
5212
5213faii_bd:
5214 add.l %d2,%d3 # ea = (base + bd) + index
5215 mov.l %d3,%d0
5216fdone_ea:
5217 mov.l %d0,%a0
5218
5219 movm.l (%sp)+,&0x003c # restore d2-d5
5220 rts
5221
5222#########################################################
5223fcea_err:
5224 mov.l %d3,%a0
5225
5226 movm.l (%sp)+,&0x003c # restore d2-d5
5227 mov.w &0x0101,%d0
5228 bra.l iea_dacc
5229
5230fcea_iacc:
5231 movm.l (%sp)+,&0x003c # restore d2-d5
5232 bra.l iea_iacc
5233
5234fmovm_out_err:
5235 bsr.l restore
5236 mov.w &0x00e1,%d0
5237 bra.b fmovm_err
5238
5239fmovm_in_err:
5240 bsr.l restore
5241 mov.w &0x0161,%d0
5242
5243fmovm_err:
5244 mov.l L_SCR1(%a6),%a0
5245 bra.l iea_dacc
5246
5247#########################################################################
5248# XDEF **************************************************************** #
5249# fmovm_ctrl(): emulate fmovm.l of control registers instr #
5250# #
5251# XREF **************************************************************** #
5252# _imem_read_long() - read longword from memory #
5253# iea_iacc() - _imem_read_long() failed; error recovery #
5254# #
5255# INPUT *************************************************************** #
5256# None #
5257# #
5258# OUTPUT ************************************************************** #
5259# If _imem_read_long() doesn't fail: #
5260# USER_FPCR(a6) = new FPCR value #
5261# USER_FPSR(a6) = new FPSR value #
5262# USER_FPIAR(a6) = new FPIAR value #
5263# #
5264# ALGORITHM *********************************************************** #
5265# Decode the instruction type by looking at the extension word #
5266# in order to see how many control registers to fetch from memory. #
5267# Fetch them using _imem_read_long(). If this fetch fails, exit through #
5268# the special access error exit handler iea_iacc(). #
5269# #
5270# Instruction word decoding: #
5271# #
5272# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5273# #
5274# WORD1 WORD2 #
5275# 1111 0010 00 111100 100$ $$00 0000 0000 #
5276# #
5277# $$$ (100): FPCR #
5278# (010): FPSR #
5279# (001): FPIAR #
5280# (000): FPIAR #
5281# #
5282#########################################################################
5283
5284 global fmovm_ctrl
5285fmovm_ctrl:
5286 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5287 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5288 beq.w fctrl_in_7 # yes
5289 cmpi.b %d0,&0x98 # fpcr & fpsr ?
5290 beq.w fctrl_in_6 # yes
5291 cmpi.b %d0,&0x94 # fpcr & fpiar ?
5292 beq.b fctrl_in_5 # yes
5293
5294# fmovem.l #<data>, fpsr/fpiar
5295fctrl_in_3:
5296 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5297 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5298 bsr.l _imem_read_long # fetch FPSR from mem
5299
5300 tst.l %d1 # did ifetch fail?
5301 bne.l iea_iacc # yes
5302
5303 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5304 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5305 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5306 bsr.l _imem_read_long # fetch FPIAR from mem
5307
5308 tst.l %d1 # did ifetch fail?
5309 bne.l iea_iacc # yes
5310
5311 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5312 rts
5313
5314# fmovem.l #<data>, fpcr/fpiar
5315fctrl_in_5:
5316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5318 bsr.l _imem_read_long # fetch FPCR from mem
5319
5320 tst.l %d1 # did ifetch fail?
5321 bne.l iea_iacc # yes
5322
5323 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5325 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5326 bsr.l _imem_read_long # fetch FPIAR from mem
5327
5328 tst.l %d1 # did ifetch fail?
5329 bne.l iea_iacc # yes
5330
5331 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5332 rts
5333
5334# fmovem.l #<data>, fpcr/fpsr
5335fctrl_in_6:
5336 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5337 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5338 bsr.l _imem_read_long # fetch FPCR from mem
5339
5340 tst.l %d1 # did ifetch fail?
5341 bne.l iea_iacc # yes
5342
5343 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5344 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5345 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5346 bsr.l _imem_read_long # fetch FPSR from mem
5347
5348 tst.l %d1 # did ifetch fail?
5349 bne.l iea_iacc # yes
5350
5351 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5352 rts
5353
5354# fmovem.l #<data>, fpcr/fpsr/fpiar
5355fctrl_in_7:
5356 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5357 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5358 bsr.l _imem_read_long # fetch FPCR from mem
5359
5360 tst.l %d1 # did ifetch fail?
5361 bne.l iea_iacc # yes
5362
5363 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5364 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5365 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5366 bsr.l _imem_read_long # fetch FPSR from mem
5367
5368 tst.l %d1 # did ifetch fail?
5369 bne.l iea_iacc # yes
5370
5371 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5372 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5373 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5374 bsr.l _imem_read_long # fetch FPIAR from mem
5375
5376 tst.l %d1 # did ifetch fail?
5377 bne.l iea_iacc # yes
5378
5379 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5380 rts
5381
5382##########################################################################
5383
5384#########################################################################
5385# XDEF **************************************************************** #
5386# addsub_scaler2(): scale inputs to fadd/fsub such that no #
5387# OVFL/UNFL exceptions will result #
5388# #
5389# XREF **************************************************************** #
5390# norm() - normalize mantissa after adjusting exponent #
5391# #
5392# INPUT *************************************************************** #
5393# FP_SRC(a6) = fp op1(src) #
5394# FP_DST(a6) = fp op2(dst) #
5395# #
5396# OUTPUT ************************************************************** #
5397# FP_SRC(a6) = fp op1 scaled(src) #
5398# FP_DST(a6) = fp op2 scaled(dst) #
5399# d0 = scale amount #
5400# #
5401# ALGORITHM *********************************************************** #
5402# If the DST exponent is > the SRC exponent, set the DST exponent #
5403# equal to 0x3fff and scale the SRC exponent by the value that the #
5404# DST exponent was scaled by. If the SRC exponent is greater or equal, #
5405# do the opposite. Return this scale factor in d0. #
5406# If the two exponents differ by > the number of mantissa bits #
5407# plus two, then set the smallest exponent to a very small value as a #
5408# quick shortcut. #
5409# #
5410#########################################################################
5411
5412 global addsub_scaler2
5413addsub_scaler2:
5414 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5415 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5416 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5417 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5418 mov.w SRC_EX(%a0),%d0
5419 mov.w DST_EX(%a1),%d1
5420 mov.w %d0,FP_SCR0_EX(%a6)
5421 mov.w %d1,FP_SCR1_EX(%a6)
5422
5423 andi.w &0x7fff,%d0
5424 andi.w &0x7fff,%d1
5425 mov.w %d0,L_SCR1(%a6) # store src exponent
5426 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5427
5428 cmp.w %d0, %d1 # is src exp >= dst exp?
5429 bge.l src_exp_ge2
5430
5431# dst exp is > src exp; scale dst to exp = 0x3fff
5432dst_exp_gt2:
5433 bsr.l scale_to_zero_dst
5434 mov.l %d0,-(%sp) # save scale factor
5435
5436 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5437 bne.b cmpexp12
5438
5439 lea FP_SCR0(%a6),%a0
5440 bsr.l norm # normalize the denorm; result is new exp
5441 neg.w %d0 # new exp = -(shft val)
5442 mov.w %d0,L_SCR1(%a6) # inset new exp
5443
5444cmpexp12:
5445 mov.w 2+L_SCR1(%a6),%d0
5446 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5447
5448 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5449 bge.b quick_scale12
5450
5451 mov.w L_SCR1(%a6),%d0
5452 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5453 mov.w FP_SCR0_EX(%a6),%d1
5454 and.w &0x8000,%d1
5455 or.w %d1,%d0 # concat {sgn,new exp}
5456 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5457
5458 mov.l (%sp)+,%d0 # return SCALE factor
5459 rts
5460
5461quick_scale12:
5462 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5463 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5464
5465 mov.l (%sp)+,%d0 # return SCALE factor
5466 rts
5467
5468# src exp is >= dst exp; scale src to exp = 0x3fff
5469src_exp_ge2:
5470 bsr.l scale_to_zero_src
5471 mov.l %d0,-(%sp) # save scale factor
5472
5473 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5474 bne.b cmpexp22
5475 lea FP_SCR1(%a6),%a0
5476 bsr.l norm # normalize the denorm; result is new exp
5477 neg.w %d0 # new exp = -(shft val)
5478 mov.w %d0,2+L_SCR1(%a6) # inset new exp
5479
5480cmpexp22:
5481 mov.w L_SCR1(%a6),%d0
5482 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5483
5484 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5485 bge.b quick_scale22
5486
5487 mov.w 2+L_SCR1(%a6),%d0
5488 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5489 mov.w FP_SCR1_EX(%a6),%d1
5490 andi.w &0x8000,%d1
5491 or.w %d1,%d0 # concat {sgn,new exp}
5492 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5493
5494 mov.l (%sp)+,%d0 # return SCALE factor
5495 rts
5496
5497quick_scale22:
5498 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5499 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5500
5501 mov.l (%sp)+,%d0 # return SCALE factor
5502 rts
5503
5504##########################################################################
5505
5506#########################################################################
5507# XDEF **************************************************************** #
5508# scale_to_zero_src(): scale the exponent of extended precision #
5509# value at FP_SCR0(a6). #
5510# #
5511# XREF **************************************************************** #
5512# norm() - normalize the mantissa if the operand was a DENORM #
5513# #
5514# INPUT *************************************************************** #
5515# FP_SCR0(a6) = extended precision operand to be scaled #
5516# #
5517# OUTPUT ************************************************************** #
5518# FP_SCR0(a6) = scaled extended precision operand #
5519# d0 = scale value #
5520# #
5521# ALGORITHM *********************************************************** #
5522# Set the exponent of the input operand to 0x3fff. Save the value #
5523# of the difference between the original and new exponent. Then, #
5524# normalize the operand if it was a DENORM. Add this normalization #
5525# value to the previous value. Return the result. #
5526# #
5527#########################################################################
5528
5529 global scale_to_zero_src
5530scale_to_zero_src:
5531 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5532 mov.w %d1,%d0 # make a copy
5533
5534 andi.l &0x7fff,%d1 # extract operand's exponent
5535
5536 andi.w &0x8000,%d0 # extract operand's sgn
5537 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5538
5539 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5540
5541 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5542 beq.b stzs_denorm # normalize the DENORM
5543
5544stzs_norm:
5545 mov.l &0x3fff,%d0
5546 sub.l %d1,%d0 # scale = BIAS + (-exp)
5547
5548 rts
5549
5550stzs_denorm:
5551 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5552 bsr.l norm # normalize denorm
5553 neg.l %d0 # new exponent = -(shft val)
5554 mov.l %d0,%d1 # prepare for op_norm call
5555 bra.b stzs_norm # finish scaling
5556
5557###
5558
5559#########################################################################
5560# XDEF **************************************************************** #
5561# scale_sqrt(): scale the input operand exponent so a subsequent #
5562# fsqrt operation won't take an exception. #
5563# #
5564# XREF **************************************************************** #
5565# norm() - normalize the mantissa if the operand was a DENORM #
5566# #
5567# INPUT *************************************************************** #
5568# FP_SCR0(a6) = extended precision operand to be scaled #
5569# #
5570# OUTPUT ************************************************************** #
5571# FP_SCR0(a6) = scaled extended precision operand #
5572# d0 = scale value #
5573# #
5574# ALGORITHM *********************************************************** #
5575# If the input operand is a DENORM, normalize it. #
5576# If the exponent of the input operand is even, set the exponent #
5577# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5578# exponent of the input operand is off, set the exponent to ox3fff and #
5579# return a scale factor of "(exp-0x3fff)/2". #
5580# #
5581#########################################################################
5582
5583 global scale_sqrt
5584scale_sqrt:
5585 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5586 beq.b ss_denorm # normalize the DENORM
5587
5588 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5589 andi.l &0x7fff,%d1 # extract operand's exponent
5590
5591 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5592
5593 btst &0x0,%d1 # is exp even or odd?
5594 beq.b ss_norm_even
5595
5596 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5597
5598 mov.l &0x3fff,%d0
5599 sub.l %d1,%d0 # scale = BIAS + (-exp)
5600 asr.l &0x1,%d0 # divide scale factor by 2
5601 rts
5602
5603ss_norm_even:
5604 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5605
5606 mov.l &0x3ffe,%d0
5607 sub.l %d1,%d0 # scale = BIAS + (-exp)
5608 asr.l &0x1,%d0 # divide scale factor by 2
5609 rts
5610
5611ss_denorm:
5612 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5613 bsr.l norm # normalize denorm
5614
5615 btst &0x0,%d0 # is exp even or odd?
5616 beq.b ss_denorm_even
5617
5618 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5619
5620 add.l &0x3fff,%d0
5621 asr.l &0x1,%d0 # divide scale factor by 2
5622 rts
5623
5624ss_denorm_even:
5625 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5626
5627 add.l &0x3ffe,%d0
5628 asr.l &0x1,%d0 # divide scale factor by 2
5629 rts
5630
5631###
5632
5633#########################################################################
5634# XDEF **************************************************************** #
5635# scale_to_zero_dst(): scale the exponent of extended precision #
5636# value at FP_SCR1(a6). #
5637# #
5638# XREF **************************************************************** #
5639# norm() - normalize the mantissa if the operand was a DENORM #
5640# #
5641# INPUT *************************************************************** #
5642# FP_SCR1(a6) = extended precision operand to be scaled #
5643# #
5644# OUTPUT ************************************************************** #
5645# FP_SCR1(a6) = scaled extended precision operand #
5646# d0 = scale value #
5647# #
5648# ALGORITHM *********************************************************** #
5649# Set the exponent of the input operand to 0x3fff. Save the value #
5650# of the difference between the original and new exponent. Then, #
5651# normalize the operand if it was a DENORM. Add this normalization #
5652# value to the previous value. Return the result. #
5653# #
5654#########################################################################
5655
5656 global scale_to_zero_dst
5657scale_to_zero_dst:
5658 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5659 mov.w %d1,%d0 # make a copy
5660
5661 andi.l &0x7fff,%d1 # extract operand's exponent
5662
5663 andi.w &0x8000,%d0 # extract operand's sgn
5664 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5665
5666 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5667
5668 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5669 beq.b stzd_denorm # normalize the DENORM
5670
5671stzd_norm:
5672 mov.l &0x3fff,%d0
5673 sub.l %d1,%d0 # scale = BIAS + (-exp)
5674 rts
5675
5676stzd_denorm:
5677 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5678 bsr.l norm # normalize denorm
5679 neg.l %d0 # new exponent = -(shft val)
5680 mov.l %d0,%d1 # prepare for op_norm call
5681 bra.b stzd_norm # finish scaling
5682
5683##########################################################################
5684
5685#########################################################################
5686# XDEF **************************************************************** #
5687# res_qnan(): return default result w/ QNAN operand for dyadic #
5688# res_snan(): return default result w/ SNAN operand for dyadic #
5689# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5690# res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5691# #
5692# XREF **************************************************************** #
5693# None #
5694# #
5695# INPUT *************************************************************** #
5696# FP_SRC(a6) = pointer to extended precision src operand #
5697# FP_DST(a6) = pointer to extended precision dst operand #
5698# #
5699# OUTPUT ************************************************************** #
5700# fp0 = default result #
5701# #
5702# ALGORITHM *********************************************************** #
5703# If either operand (but not both operands) of an operation is a #
5704# nonsignalling NAN, then that NAN is returned as the result. If both #
5705# operands are nonsignalling NANs, then the destination operand #
5706# nonsignalling NAN is returned as the result. #
5707# If either operand to an operation is a signalling NAN (SNAN), #
5708# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5709# enable bit is set in the FPCR, then the trap is taken and the #
5710# destination is not modified. If the SNAN trap enable bit is not set, #
5711# then the SNAN is converted to a nonsignalling NAN (by setting the #
5712# SNAN bit in the operand to one), and the operation continues as #
5713# described in the preceding paragraph, for nonsignalling NANs. #
5714# Make sure the appropriate FPSR bits are set before exiting. #
5715# #
5716#########################################################################
5717
5718 global res_qnan
5719 global res_snan
5720res_qnan:
5721res_snan:
5722 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5723 beq.b dst_snan2
5724 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5725 beq.b dst_qnan2
5726src_nan:
5727 cmp.b STAG(%a6), &QNAN
5728 beq.b src_qnan2
5729 global res_snan_1op
5730res_snan_1op:
5731src_snan2:
5732 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5733 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734 lea FP_SRC(%a6), %a0
5735 bra.b nan_comp
5736 global res_qnan_1op
5737res_qnan_1op:
5738src_qnan2:
5739 or.l &nan_mask, USER_FPSR(%a6)
5740 lea FP_SRC(%a6), %a0
5741 bra.b nan_comp
5742dst_snan2:
5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5745 lea FP_DST(%a6), %a0
5746 bra.b nan_comp
5747dst_qnan2:
5748 lea FP_DST(%a6), %a0
5749 cmp.b STAG(%a6), &SNAN
5750 bne nan_done
5751 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5752nan_done:
5753 or.l &nan_mask, USER_FPSR(%a6)
5754nan_comp:
5755 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5756 beq.b nan_not_neg
5757 or.l &neg_mask, USER_FPSR(%a6)
5758nan_not_neg:
5759 fmovm.x (%a0), &0x80
5760 rts
5761
5762#########################################################################
5763# XDEF **************************************************************** #
5764# res_operr(): return default result during operand error #
5765# #
5766# XREF **************************************************************** #
5767# None #
5768# #
5769# INPUT *************************************************************** #
5770# None #
5771# #
5772# OUTPUT ************************************************************** #
5773# fp0 = default operand error result #
5774# #
5775# ALGORITHM *********************************************************** #
5776# An nonsignalling NAN is returned as the default result when #
5777# an operand error occurs for the following cases: #
5778# #
5779# Multiply: (Infinity x Zero) #
5780# Divide : (Zero / Zero) || (Infinity / Infinity) #
5781# #
5782#########################################################################
5783
5784 global res_operr
5785res_operr:
5786 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787 fmovm.x nan_return(%pc), &0x80
5788 rts
5789
5790nan_return:
5791 long 0x7fff0000, 0xffffffff, 0xffffffff
5792
5793#########################################################################
5794# XDEF **************************************************************** #
5795# _denorm(): denormalize an intermediate result #
5796# #
5797# XREF **************************************************************** #
5798# None #
5799# #
5800# INPUT *************************************************************** #
5801# a0 = points to the operand to be denormalized #
5802# (in the internal extended format) #
5803# #
5804# d0 = rounding precision #
5805# #
5806# OUTPUT ************************************************************** #
5807# a0 = pointer to the denormalized result #
5808# (in the internal extended format) #
5809# #
5810# d0 = guard,round,sticky #
5811# #
5812# ALGORITHM *********************************************************** #
5813# According to the exponent underflow threshold for the given #
5814# precision, shift the mantissa bits to the right in order raise the #
5815# exponent of the operand to the threshold value. While shifting the #
5816# mantissa bits right, maintain the value of the guard, round, and #
5817# sticky bits. #
5818# other notes: #
5819# (1) _denorm() is called by the underflow routines #
5820# (2) _denorm() does NOT affect the status register #
5821# #
5822#########################################################################
5823
5824#
5825# table of exponent threshold values for each precision
5826#
5827tbl_thresh:
5828 short 0x0
5829 short sgl_thresh
5830 short dbl_thresh
5831
5832 global _denorm
5833_denorm:
5834#
5835# Load the exponent threshold for the precision selected and check
5836# to see if (threshold - exponent) is > 65 in which case we can
5837# simply calculate the sticky bit and zero the mantissa. otherwise
5838# we have to call the denormalization routine.
5839#
5840 lsr.b &0x2, %d0 # shift prec to lo bits
5841 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842 mov.w %d1, %d0 # copy d1 into d0
5843 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5844 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5845 bpl.b denorm_set_stky # yes; just calc sticky
5846
5847 clr.l %d0 # clear g,r,s
5848 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849 beq.b denorm_call # no; don't change anything
5850 bset &29, %d0 # yes; set sticky bit
5851
5852denorm_call:
5853 bsr.l dnrm_lp # denormalize the number
5854 rts
5855
5856#
5857# all bit would have been shifted off during the denorm so simply
5858# calculate if the sticky should be set and clear the entire mantissa.
5859#
5860denorm_set_stky:
5861 mov.l &0x20000000, %d0 # set sticky bit in return value
5862 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5863 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5864 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5865 rts
5866
5867# #
5868# dnrm_lp(): normalize exponent/mantissa to specified threshold #
5869# #
5870# INPUT: #
5871# %a0 : points to the operand to be denormalized #
5872# %d0{31:29} : initial guard,round,sticky #
5873# %d1{15:0} : denormalization threshold #
5874# OUTPUT: #
5875# %a0 : points to the denormalized operand #
5876# %d0{31:29} : final guard,round,sticky #
5877# #
5878
5879# *** Local Equates *** #
5880set GRS, L_SCR2 # g,r,s temp storage
5881set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5882
5883 global dnrm_lp
5884dnrm_lp:
5885
5886#
5887# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888# in memory so as to make the bitfield extraction for denormalization easier.
5889#
5890 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891 mov.l %d0, GRS(%a6) # place g,r,s after it
5892
5893#
5894# check to see how much less than the underflow threshold the operand
5895# exponent is.
5896#
5897 mov.l %d1, %d0 # copy the denorm threshold
5898 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5899 ble.b dnrm_no_lp # d1 <= 0
5900 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5901 blt.b case_1 # yes
5902 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5903 blt.b case_2 # yes
5904 bra.w case_3 # (d1 >= 64)
5905
5906#
5907# No normalization necessary
5908#
5909dnrm_no_lp:
5910 mov.l GRS(%a6), %d0 # restore original g,r,s
5911 rts
5912
5913#
5914# case (0<d1<32)
5915#
5916# %d0 = denorm threshold
5917# %d1 = "n" = amt to shift
5918#
5919# ---------------------------------------------------------
5920# | FTEMP_HI | FTEMP_LO |grs000.........000|
5921# ---------------------------------------------------------
5922# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923# \ \ \ \
5924# \ \ \ \
5925# \ \ \ \
5926# \ \ \ \
5927# \ \ \ \
5928# \ \ \ \
5929# \ \ \ \
5930# \ \ \ \
5931# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932# ---------------------------------------------------------
5933# |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5934# ---------------------------------------------------------
5935#
5936case_1:
5937 mov.l %d2, -(%sp) # create temp storage
5938
5939 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5940 mov.l &32, %d0
5941 sub.w %d1, %d0 # %d0 = 32 - %d1
5942
5943 cmpi.w %d1, &29 # is shft amt >= 29
5944 blt.b case1_extract # no; no fix needed
5945 mov.b GRS(%a6), %d2
5946 or.b %d2, 3+FTEMP_LO2(%a6)
5947
5948case1_extract:
5949 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952
5953 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5954 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5955
5956 bftst %d0{&2:&30} # were bits shifted off?
5957 beq.b case1_sticky_clear # no; go finish
5958 bset &rnd_stky_bit, %d0 # yes; set sticky bit
5959
5960case1_sticky_clear:
5961 and.l &0xe0000000, %d0 # clear all but G,R,S
5962 mov.l (%sp)+, %d2 # restore temp register
5963 rts
5964
5965#
5966# case (32<=d1<64)
5967#
5968# %d0 = denorm threshold
5969# %d1 = "n" = amt to shift
5970#
5971# ---------------------------------------------------------
5972# | FTEMP_HI | FTEMP_LO |grs000.........000|
5973# ---------------------------------------------------------
5974# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975# \ \ \
5976# \ \ \
5977# \ \ -------------------
5978# \ -------------------- \
5979# ------------------- \ \
5980# \ \ \
5981# \ \ \
5982# \ \ \
5983# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984# ---------------------------------------------------------
5985# |0...............0|0....0| NEW_LO |grs |
5986# ---------------------------------------------------------
5987#
5988case_2:
5989 mov.l %d2, -(%sp) # create temp storage
5990
5991 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5992 subi.w &0x20, %d1 # %d1 now between 0 and 32
5993 mov.l &0x20, %d0
5994 sub.w %d1, %d0 # %d0 = 32 - %d1
5995
5996# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997# the number of bits to check for the sticky detect.
5998# it only plays a role in shift amounts of 61-63.
5999 mov.b GRS(%a6), %d2
6000 or.b %d2, 3+FTEMP_LO2(%a6)
6001
6002 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004
6005 bftst %d1{&2:&30} # were any bits shifted off?
6006 bne.b case2_set_sticky # yes; set sticky bit
6007 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6008 bne.b case2_set_sticky # yes; set sticky bit
6009
6010 mov.l %d1, %d0 # move new G,R,S to %d0
6011 bra.b case2_end
6012
6013case2_set_sticky:
6014 mov.l %d1, %d0 # move new G,R,S to %d0
6015 bset &rnd_stky_bit, %d0 # set sticky bit
6016
6017case2_end:
6018 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6019 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6020 and.l &0xe0000000, %d0 # clear all but G,R,S
6021
6022 mov.l (%sp)+,%d2 # restore temp register
6023 rts
6024
6025#
6026# case (d1>=64)
6027#
6028# %d0 = denorm threshold
6029# %d1 = amt to shift
6030#
6031case_3:
6032 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6033
6034 cmpi.w %d1, &65 # is shift amt > 65?
6035 blt.b case3_64 # no; it's == 64
6036 beq.b case3_65 # no; it's == 65
6037
6038#
6039# case (d1>65)
6040#
6041# Shift value is > 65 and out of range. All bits are shifted off.
6042# Return a zero mantissa with the sticky bit set
6043#
6044 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6045 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6046 mov.l &0x20000000, %d0 # set sticky bit
6047 rts
6048
6049#
6050# case (d1 == 64)
6051#
6052# ---------------------------------------------------------
6053# | FTEMP_HI | FTEMP_LO |grs000.........000|
6054# ---------------------------------------------------------
6055# <-------(32)------>
6056# \ \
6057# \ \
6058# \ \
6059# \ ------------------------------
6060# ------------------------------- \
6061# \ \
6062# \ \
6063# \ \
6064# <-------(32)------>
6065# ---------------------------------------------------------
6066# |0...............0|0................0|grs |
6067# ---------------------------------------------------------
6068#
6069case3_64:
6070 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6071 mov.l %d0, %d1 # make a copy
6072 and.l &0xc0000000, %d0 # extract G,R
6073 and.l &0x3fffffff, %d1 # extract other bits
6074
6075 bra.b case3_complete
6076
6077#
6078# case (d1 == 65)
6079#
6080# ---------------------------------------------------------
6081# | FTEMP_HI | FTEMP_LO |grs000.........000|
6082# ---------------------------------------------------------
6083# <-------(32)------>
6084# \ \
6085# \ \
6086# \ \
6087# \ ------------------------------
6088# -------------------------------- \
6089# \ \
6090# \ \
6091# \ \
6092# <-------(31)----->
6093# ---------------------------------------------------------
6094# |0...............0|0................0|0rs |
6095# ---------------------------------------------------------
6096#
6097case3_65:
6098 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6099 and.l &0x80000000, %d0 # extract R bit
6100 lsr.l &0x1, %d0 # shift high bit into R bit
6101 and.l &0x7fffffff, %d1 # extract other bits
6102
6103case3_complete:
6104# last operation done was an "and" of the bits shifted off so the condition
6105# codes are already set so branch accordingly.
6106 bne.b case3_set_sticky # yes; go set new sticky
6107 tst.l FTEMP_LO(%a0) # were any bits shifted off?
6108 bne.b case3_set_sticky # yes; go set new sticky
6109 tst.b GRS(%a6) # were any bits shifted off?
6110 bne.b case3_set_sticky # yes; go set new sticky
6111
6112#
6113# no bits were shifted off so don't set the sticky bit.
6114# the guard and
6115# the entire mantissa is zero.
6116#
6117 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6118 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6119 rts
6120
6121#
6122# some bits were shifted off so set the sticky bit.
6123# the entire mantissa is zero.
6124#
6125case3_set_sticky:
6126 bset &rnd_stky_bit,%d0 # set new sticky bit
6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6129 rts
6130
6131#########################################################################
6132# XDEF **************************************************************** #
6133# _round(): round result according to precision/mode #
6134# #
6135# XREF **************************************************************** #
6136# None #
6137# #
6138# INPUT *************************************************************** #
6139# a0 = ptr to input operand in internal extended format #
6140# d1(hi) = contains rounding precision: #
6141# ext = $0000xxxx #
6142# sgl = $0004xxxx #
6143# dbl = $0008xxxx #
6144# d1(lo) = contains rounding mode: #
6145# RN = $xxxx0000 #
6146# RZ = $xxxx0001 #
6147# RM = $xxxx0002 #
6148# RP = $xxxx0003 #
6149# d0{31:29} = contains the g,r,s bits (extended) #
6150# #
6151# OUTPUT ************************************************************** #
6152# a0 = pointer to rounded result #
6153# #
6154# ALGORITHM *********************************************************** #
6155# On return the value pointed to by a0 is correctly rounded, #
6156# a0 is preserved and the g-r-s bits in d0 are cleared. #
6157# The result is not typed - the tag field is invalid. The #
6158# result is still in the internal extended format. #
6159# #
6160# The INEX bit of USER_FPSR will be set if the rounded result was #
6161# inexact (i.e. if any of the g-r-s bits were set). #
6162# #
6163#########################################################################
6164
6165 global _round
6166_round:
6167#
6168# ext_grs() looks at the rounding precision and sets the appropriate
6169# G,R,S bits.
6170# If (G,R,S == 0) then result is exact and round is done, else set
6171# the inex flag in status reg and continue.
6172#
6173 bsr.l ext_grs # extract G,R,S
6174
6175 tst.l %d0 # are G,R,S zero?
6176 beq.w truncate # yes; round is complete
6177
6178 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179
6180#
6181# Use rounding mode as an index into a jump table for these modes.
6182# All of the following assumes grs != 0.
6183#
6184 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6186
6187tbl_mode:
6188 short rnd_near - tbl_mode
6189 short truncate - tbl_mode # RZ always truncates
6190 short rnd_mnus - tbl_mode
6191 short rnd_plus - tbl_mode
6192
6193#################################################################
6194# ROUND PLUS INFINITY #
6195# #
6196# If sign of fp number = 0 (positive), then add 1 to l. #
6197#################################################################
6198rnd_plus:
6199 tst.b FTEMP_SGN(%a0) # check for sign
6200 bmi.w truncate # if positive then truncate
6201
6202 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6203 swap %d1 # set up d1 for round prec.
6204
6205 cmpi.b %d1, &s_mode # is prec = sgl?
6206 beq.w add_sgl # yes
6207 bgt.w add_dbl # no; it's dbl
6208 bra.w add_ext # no; it's ext
6209
6210#################################################################
6211# ROUND MINUS INFINITY #
6212# #
6213# If sign of fp number = 1 (negative), then add 1 to l. #
6214#################################################################
6215rnd_mnus:
6216 tst.b FTEMP_SGN(%a0) # check for sign
6217 bpl.w truncate # if negative then truncate
6218
6219 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6220 swap %d1 # set up d1 for round prec.
6221
6222 cmpi.b %d1, &s_mode # is prec = sgl?
6223 beq.w add_sgl # yes
6224 bgt.w add_dbl # no; it's dbl
6225 bra.w add_ext # no; it's ext
6226
6227#################################################################
6228# ROUND NEAREST #
6229# #
6230# If (g=1), then add 1 to l and if (r=s=0), then clear l #
6231# Note that this will round to even in case of a tie. #
6232#################################################################
6233rnd_near:
6234 asl.l &0x1, %d0 # shift g-bit to c-bit
6235 bcc.w truncate # if (g=1) then
6236
6237 swap %d1 # set up d1 for round prec.
6238
6239 cmpi.b %d1, &s_mode # is prec = sgl?
6240 beq.w add_sgl # yes
6241 bgt.w add_dbl # no; it's dbl
6242 bra.w add_ext # no; it's ext
6243
6244# *** LOCAL EQUATES ***
6245set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6246set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6247
6248#########################
6249# ADD SINGLE #
6250#########################
6251add_sgl:
6252 add.l &ad_1_sgl, FTEMP_HI(%a0)
6253 bcc.b scc_clr # no mantissa overflow
6254 roxr.w FTEMP_HI(%a0) # shift v-bit back in
6255 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6256 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6257scc_clr:
6258 tst.l %d0 # test for rs = 0
6259 bne.b sgl_done
6260 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261sgl_done:
6262 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263 clr.l FTEMP_LO(%a0) # clear d2
6264 rts
6265
6266#########################
6267# ADD EXTENDED #
6268#########################
6269add_ext:
6270 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6271 bcc.b xcc_clr # test for carry out
6272 addq.l &1,FTEMP_HI(%a0) # propagate carry
6273 bcc.b xcc_clr
6274 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6275 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6276 roxr.w FTEMP_LO(%a0)
6277 roxr.w FTEMP_LO+2(%a0)
6278 add.w &0x1,FTEMP_EX(%a0) # and inc exp
6279xcc_clr:
6280 tst.l %d0 # test rs = 0
6281 bne.b add_ext_done
6282 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6283add_ext_done:
6284 rts
6285
6286#########################
6287# ADD DOUBLE #
6288#########################
6289add_dbl:
6290 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291 bcc.b dcc_clr # no carry
6292 addq.l &0x1, FTEMP_HI(%a0) # propagate carry
6293 bcc.b dcc_clr # no carry
6294
6295 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6296 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6297 roxr.w FTEMP_LO(%a0)
6298 roxr.w FTEMP_LO+2(%a0)
6299 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6300dcc_clr:
6301 tst.l %d0 # test for rs = 0
6302 bne.b dbl_done
6303 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304
6305dbl_done:
6306 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307 rts
6308
6309###########################
6310# Truncate all other bits #
6311###########################
6312truncate:
6313 swap %d1 # select rnd prec
6314
6315 cmpi.b %d1, &s_mode # is prec sgl?
6316 beq.w sgl_done # yes
6317 bgt.b dbl_done # no; it's dbl
6318 rts # no; it's ext
6319
6320
6321#
6322# ext_grs(): extract guard, round and sticky bits according to
6323# rounding precision.
6324#
6325# INPUT
6326# d0 = extended precision g,r,s (in d0{31:29})
6327# d1 = {PREC,ROUND}
6328# OUTPUT
6329# d0{31:29} = guard, round, sticky
6330#
6331# The ext_grs extract the guard/round/sticky bits according to the
6332# selected rounding precision. It is called by the round subroutine
6333# only. All registers except d0 are kept intact. d0 becomes an
6334# updated guard,round,sticky in d0{31:29}
6335#
6336# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337# prior to usage, and needs to restore d1 to original. this
6338# routine is tightly tied to the round routine and not meant to
6339# uphold standard subroutine calling practices.
6340#
6341
6342ext_grs:
6343 swap %d1 # have d1.w point to round precision
6344 tst.b %d1 # is rnd prec = extended?
6345 bne.b ext_grs_not_ext # no; go handle sgl or dbl
6346
6347#
6348# %d0 actually already hold g,r,s since _round() had it before calling
6349# this function. so, as long as we don't disturb it, we are "returning" it.
6350#
6351ext_grs_ext:
6352 swap %d1 # yes; return to correct positions
6353 rts
6354
6355ext_grs_not_ext:
6356 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6357
6358 cmpi.b %d1, &s_mode # is rnd prec = sgl?
6359 bne.b ext_grs_dbl # no; go handle dbl
6360
6361#
6362# sgl:
6363# 96 64 40 32 0
6364# -----------------------------------------------------
6365# | EXP |XXXXXXX| |xx | |grs|
6366# -----------------------------------------------------
6367# <--(24)--->nn\ /
6368# ee ---------------------
6369# ww |
6370# v
6371# gr new sticky
6372#
6373ext_grs_sgl:
6374 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375 mov.l &30, %d2 # of the sgl prec. limits
6376 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6377 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6378 and.l &0x0000003f, %d2 # s bit is the or of all other
6379 bne.b ext_grs_st_stky # bits to the right of g-r
6380 tst.l FTEMP_LO(%a0) # test lower mantissa
6381 bne.b ext_grs_st_stky # if any are set, set sticky
6382 tst.l %d0 # test original g,r,s
6383 bne.b ext_grs_st_stky # if any are set, set sticky
6384 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6385
6386#
6387# dbl:
6388# 96 64 32 11 0
6389# -----------------------------------------------------
6390# | EXP |XXXXXXX| | |xx |grs|
6391# -----------------------------------------------------
6392# nn\ /
6393# ee -------
6394# ww |
6395# v
6396# gr new sticky
6397#
6398ext_grs_dbl:
6399 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400 mov.l &30, %d2 # of the dbl prec. limits
6401 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6402 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6403 and.l &0x000001ff, %d2 # s bit is the or-ing of all
6404 bne.b ext_grs_st_stky # other bits to the right of g-r
6405 tst.l %d0 # test word original g,r,s
6406 bne.b ext_grs_st_stky # if any are set, set sticky
6407 bra.b ext_grs_end_sd # if clear, exit
6408
6409ext_grs_st_stky:
6410 bset &rnd_stky_bit, %d3 # set sticky bit
6411ext_grs_end_sd:
6412 mov.l %d3, %d0 # return grs to d0
6413
6414 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6415
6416 swap %d1 # restore d1 to original
6417 rts
6418
6419#########################################################################
6420# norm(): normalize the mantissa of an extended precision input. the #
6421# input operand should not be normalized already. #
6422# #
6423# XDEF **************************************************************** #
6424# norm() #
6425# #
6426# XREF **************************************************************** #
6427# none #
6428# #
6429# INPUT *************************************************************** #
6430# a0 = pointer fp extended precision operand to normalize #
6431# #
6432# OUTPUT ************************************************************** #
6433# d0 = number of bit positions the mantissa was shifted #
6434# a0 = the input operand's mantissa is normalized; the exponent #
6435# is unchanged. #
6436# #
6437#########################################################################
6438 global norm
6439norm:
6440 mov.l %d2, -(%sp) # create some temp regs
6441 mov.l %d3, -(%sp)
6442
6443 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6444 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6445
6446 bfffo %d0{&0:&32}, %d2 # how many places to shift?
6447 beq.b norm_lo # hi(man) is all zeroes!
6448
6449norm_hi:
6450 lsl.l %d2, %d0 # left shift hi(man)
6451 bfextu %d1{&0:%d2}, %d3 # extract lo bits
6452
6453 or.l %d3, %d0 # create hi(man)
6454 lsl.l %d2, %d1 # create lo(man)
6455
6456 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6457 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6458
6459 mov.l %d2, %d0 # return shift amount
6460
6461 mov.l (%sp)+, %d3 # restore temp regs
6462 mov.l (%sp)+, %d2
6463
6464 rts
6465
6466norm_lo:
6467 bfffo %d1{&0:&32}, %d2 # how many places to shift?
6468 lsl.l %d2, %d1 # shift lo(man)
6469 add.l &32, %d2 # add 32 to shft amount
6470
6471 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6472 clr.l FTEMP_LO(%a0) # lo(man) is now zero
6473
6474 mov.l %d2, %d0 # return shift amount
6475
6476 mov.l (%sp)+, %d3 # restore temp regs
6477 mov.l (%sp)+, %d2
6478
6479 rts
6480
6481#########################################################################
6482# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6483# - returns corresponding optype tag #
6484# #
6485# XDEF **************************************************************** #
6486# unnorm_fix() #
6487# #
6488# XREF **************************************************************** #
6489# norm() - normalize the mantissa #
6490# #
6491# INPUT *************************************************************** #
6492# a0 = pointer to unnormalized extended precision number #
6493# #
6494# OUTPUT ************************************************************** #
6495# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6496# a0 = input operand has been converted to a norm, denorm, or #
6497# zero; both the exponent and mantissa are changed. #
6498# #
6499#########################################################################
6500
6501 global unnorm_fix
6502unnorm_fix:
6503 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504 bne.b unnorm_shift # hi(man) is not all zeroes
6505
6506#
6507# hi(man) is all zeroes so see if any bits in lo(man) are set
6508#
6509unnorm_chk_lo:
6510 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511 beq.w unnorm_zero # yes
6512
6513 add.w &32, %d0 # no; fix shift distance
6514
6515#
6516# d0 = # shifts needed for complete normalization
6517#
6518unnorm_shift:
6519 clr.l %d1 # clear top word
6520 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6521 and.w &0x7fff, %d1 # strip off sgn
6522
6523 cmp.w %d0, %d1 # will denorm push exp < 0?
6524 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6525
6526#
6527# exponent would not go < 0. Therefore, number stays normalized
6528#
6529 sub.w %d0, %d1 # shift exponent value
6530 mov.w FTEMP_EX(%a0), %d0 # load old exponent
6531 and.w &0x8000, %d0 # save old sign
6532 or.w %d0, %d1 # {sgn,new exp}
6533 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6534
6535 bsr.l norm # normalize UNNORM
6536
6537 mov.b &NORM, %d0 # return new optype tag
6538 rts
6539
6540#
6541# exponent would go < 0, so only denormalize until exp = 0
6542#
6543unnorm_nrm_zero:
6544 cmp.b %d1, &32 # is exp <= 32?
6545 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6546
6547 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6549
6550 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6551 lsl.l %d1, %d0 # extract new lo(man)
6552 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6553
6554 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6555
6556 mov.b &DENORM, %d0 # return new optype tag
6557 rts
6558
6559#
6560# only mantissa bits set are in lo(man)
6561#
6562unnorm_nrm_zero_lrg:
6563 sub.w &32, %d1 # adjust shft amt by 32
6564
6565 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6566 lsl.l %d1, %d0 # left shift lo(man)
6567
6568 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6569 clr.l FTEMP_LO(%a0) # lo(man) = 0
6570
6571 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6572
6573 mov.b &DENORM, %d0 # return new optype tag
6574 rts
6575
6576#
6577# whole mantissa is zero so this UNNORM is actually a zero
6578#
6579unnorm_zero:
6580 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6581
6582 mov.b &ZERO, %d0 # fix optype tag
6583 rts
6584
6585#########################################################################
6586# XDEF **************************************************************** #
6587# set_tag_x(): return the optype of the input ext fp number #
6588# #
6589# XREF **************************************************************** #
6590# None #
6591# #
6592# INPUT *************************************************************** #
6593# a0 = pointer to extended precision operand #
6594# #
6595# OUTPUT ************************************************************** #
6596# d0 = value of type tag #
6597# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6598# #
6599# ALGORITHM *********************************************************** #
6600# Simply test the exponent, j-bit, and mantissa values to #
6601# determine the type of operand. #
6602# If it's an unnormalized zero, alter the operand and force it #
6603# to be a normal zero. #
6604# #
6605#########################################################################
6606
6607 global set_tag_x
6608set_tag_x:
6609 mov.w FTEMP_EX(%a0), %d0 # extract exponent
6610 andi.w &0x7fff, %d0 # strip off sign
6611 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6612 beq.b inf_or_nan_x
6613not_inf_or_nan_x:
6614 btst &0x7,FTEMP_HI(%a0)
6615 beq.b not_norm_x
6616is_norm_x:
6617 mov.b &NORM, %d0
6618 rts
6619not_norm_x:
6620 tst.w %d0 # is exponent = 0?
6621 bne.b is_unnorm_x
6622not_unnorm_x:
6623 tst.l FTEMP_HI(%a0)
6624 bne.b is_denorm_x
6625 tst.l FTEMP_LO(%a0)
6626 bne.b is_denorm_x
6627is_zero_x:
6628 mov.b &ZERO, %d0
6629 rts
6630is_denorm_x:
6631 mov.b &DENORM, %d0
6632 rts
6633# must distinguish now "Unnormalized zeroes" which we
6634# must convert to zero.
6635is_unnorm_x:
6636 tst.l FTEMP_HI(%a0)
6637 bne.b is_unnorm_reg_x
6638 tst.l FTEMP_LO(%a0)
6639 bne.b is_unnorm_reg_x
6640# it's an "unnormalized zero". let's convert it to an actual zero...
6641 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6642 mov.b &ZERO, %d0
6643 rts
6644is_unnorm_reg_x:
6645 mov.b &UNNORM, %d0
6646 rts
6647inf_or_nan_x:
6648 tst.l FTEMP_LO(%a0)
6649 bne.b is_nan_x
6650 mov.l FTEMP_HI(%a0), %d0
6651 and.l &0x7fffffff, %d0 # msb is a don't care!
6652 bne.b is_nan_x
6653is_inf_x:
6654 mov.b &INF, %d0
6655 rts
6656is_nan_x:
6657 btst &0x6, FTEMP_HI(%a0)
6658 beq.b is_snan_x
6659 mov.b &QNAN, %d0
6660 rts
6661is_snan_x:
6662 mov.b &SNAN, %d0
6663 rts
6664
6665#########################################################################
6666# XDEF **************************************************************** #
6667# set_tag_d(): return the optype of the input dbl fp number #
6668# #
6669# XREF **************************************************************** #
6670# None #
6671# #
6672# INPUT *************************************************************** #
6673# a0 = points to double precision operand #
6674# #
6675# OUTPUT ************************************************************** #
6676# d0 = value of type tag #
6677# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6678# #
6679# ALGORITHM *********************************************************** #
6680# Simply test the exponent, j-bit, and mantissa values to #
6681# determine the type of operand. #
6682# #
6683#########################################################################
6684
6685 global set_tag_d
6686set_tag_d:
6687 mov.l FTEMP(%a0), %d0
6688 mov.l %d0, %d1
6689
6690 andi.l &0x7ff00000, %d0
6691 beq.b zero_or_denorm_d
6692
6693 cmpi.l %d0, &0x7ff00000
6694 beq.b inf_or_nan_d
6695
6696is_norm_d:
6697 mov.b &NORM, %d0
6698 rts
6699zero_or_denorm_d:
6700 and.l &0x000fffff, %d1
6701 bne is_denorm_d
6702 tst.l 4+FTEMP(%a0)
6703 bne is_denorm_d
6704is_zero_d:
6705 mov.b &ZERO, %d0
6706 rts
6707is_denorm_d:
6708 mov.b &DENORM, %d0
6709 rts
6710inf_or_nan_d:
6711 and.l &0x000fffff, %d1
6712 bne is_nan_d
6713 tst.l 4+FTEMP(%a0)
6714 bne is_nan_d
6715is_inf_d:
6716 mov.b &INF, %d0
6717 rts
6718is_nan_d:
6719 btst &19, %d1
6720 bne is_qnan_d
6721is_snan_d:
6722 mov.b &SNAN, %d0
6723 rts
6724is_qnan_d:
6725 mov.b &QNAN, %d0
6726 rts
6727
6728#########################################################################
6729# XDEF **************************************************************** #
6730# set_tag_s(): return the optype of the input sgl fp number #
6731# #
6732# XREF **************************************************************** #
6733# None #
6734# #
6735# INPUT *************************************************************** #
6736# a0 = pointer to single precision operand #
6737# #
6738# OUTPUT ************************************************************** #
6739# d0 = value of type tag #
6740# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6741# #
6742# ALGORITHM *********************************************************** #
6743# Simply test the exponent, j-bit, and mantissa values to #
6744# determine the type of operand. #
6745# #
6746#########################################################################
6747
6748 global set_tag_s
6749set_tag_s:
6750 mov.l FTEMP(%a0), %d0
6751 mov.l %d0, %d1
6752
6753 andi.l &0x7f800000, %d0
6754 beq.b zero_or_denorm_s
6755
6756 cmpi.l %d0, &0x7f800000
6757 beq.b inf_or_nan_s
6758
6759is_norm_s:
6760 mov.b &NORM, %d0
6761 rts
6762zero_or_denorm_s:
6763 and.l &0x007fffff, %d1
6764 bne is_denorm_s
6765is_zero_s:
6766 mov.b &ZERO, %d0
6767 rts
6768is_denorm_s:
6769 mov.b &DENORM, %d0
6770 rts
6771inf_or_nan_s:
6772 and.l &0x007fffff, %d1
6773 bne is_nan_s
6774is_inf_s:
6775 mov.b &INF, %d0
6776 rts
6777is_nan_s:
6778 btst &22, %d1
6779 bne is_qnan_s
6780is_snan_s:
6781 mov.b &SNAN, %d0
6782 rts
6783is_qnan_s:
6784 mov.b &QNAN, %d0
6785 rts
6786
6787#########################################################################
6788# XDEF **************************************************************** #
6789# unf_res(): routine to produce default underflow result of a #
6790# scaled extended precision number; this is used by #
6791# fadd/fdiv/fmul/etc. emulation routines. #
6792# unf_res4(): same as above but for fsglmul/fsgldiv which use #
6793# single round prec and extended prec mode. #
6794# #
6795# XREF **************************************************************** #
6796# _denorm() - denormalize according to scale factor #
6797# _round() - round denormalized number according to rnd prec #
6798# #
6799# INPUT *************************************************************** #
6800# a0 = pointer to extended precison operand #
6801# d0 = scale factor #
6802# d1 = rounding precision/mode #
6803# #
6804# OUTPUT ************************************************************** #
6805# a0 = pointer to default underflow result in extended precision #
6806# d0.b = result FPSR_cc which caller may or may not want to save #
6807# #
6808# ALGORITHM *********************************************************** #
6809# Convert the input operand to "internal format" which means the #
6810# exponent is extended to 16 bits and the sign is stored in the unused #
6811# portion of the extended precison operand. Denormalize the number #
6812# according to the scale factor passed in d0. Then, round the #
6813# denormalized result. #
6814# Set the FPSR_exc bits as appropriate but return the cc bits in #
6815# d0 in case the caller doesn't want to save them (as is the case for #
6816# fmove out). #
6817# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6818# precision and the rounding mode to single. #
6819# #
6820#########################################################################
6821 global unf_res
6822unf_res:
6823 mov.l %d1, -(%sp) # save rnd prec,mode on stack
6824
6825 btst &0x7, FTEMP_EX(%a0) # make "internal" format
6826 sne FTEMP_SGN(%a0)
6827
6828 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6829 and.w &0x7fff, %d1
6830 sub.w %d0, %d1
6831 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6832
6833 mov.l %a0, -(%sp) # save operand ptr during calls
6834
6835 mov.l 0x4(%sp),%d0 # pass rnd prec.
6836 andi.w &0x00c0,%d0
6837 lsr.w &0x4,%d0
6838 bsr.l _denorm # denorm result
6839
6840 mov.l (%sp),%a0
6841 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6842 andi.w &0xc0,%d1 # extract rnd prec
6843 lsr.w &0x4,%d1
6844 swap %d1
6845 mov.w 0x6(%sp),%d1
6846 andi.w &0x30,%d1
6847 lsr.w &0x4,%d1
6848 bsr.l _round # round the denorm
6849
6850 mov.l (%sp)+, %a0
6851
6852# result is now rounded properly. convert back to normal format
6853 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6854 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6855 beq.b unf_res_chkifzero # no; result is positive
6856 bset &0x7, FTEMP_EX(%a0) # set result sgn
6857 clr.b FTEMP_SGN(%a0) # clear temp sign
6858
6859# the number may have become zero after rounding. set ccodes accordingly.
6860unf_res_chkifzero:
6861 clr.l %d0
6862 tst.l FTEMP_HI(%a0) # is value now a zero?
6863 bne.b unf_res_cont # no
6864 tst.l FTEMP_LO(%a0)
6865 bne.b unf_res_cont # no
6866# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6867 bset &z_bit, %d0 # yes; set zero ccode bit
6868
6869unf_res_cont:
6870
6871#
6872# can inex1 also be set along with unfl and inex2???
6873#
6874# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875#
6876 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877 beq.b unf_res_end # no
6878 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879
6880unf_res_end:
6881 add.l &0x4, %sp # clear stack
6882 rts
6883
6884# unf_res() for fsglmul() and fsgldiv().
6885 global unf_res4
6886unf_res4:
6887 mov.l %d1,-(%sp) # save rnd prec,mode on stack
6888
6889 btst &0x7,FTEMP_EX(%a0) # make "internal" format
6890 sne FTEMP_SGN(%a0)
6891
6892 mov.w FTEMP_EX(%a0),%d1 # extract exponent
6893 and.w &0x7fff,%d1
6894 sub.w %d0,%d1
6895 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6896
6897 mov.l %a0,-(%sp) # save operand ptr during calls
6898
6899 clr.l %d0 # force rnd prec = ext
6900 bsr.l _denorm # denorm result
6901
6902 mov.l (%sp),%a0
6903 mov.w &s_mode,%d1 # force rnd prec = sgl
6904 swap %d1
6905 mov.w 0x6(%sp),%d1 # load rnd mode
6906 andi.w &0x30,%d1 # extract rnd prec
6907 lsr.w &0x4,%d1
6908 bsr.l _round # round the denorm
6909
6910 mov.l (%sp)+,%a0
6911
6912# result is now rounded properly. convert back to normal format
6913 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6914 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6915 beq.b unf_res4_chkifzero # no; result is positive
6916 bset &0x7,FTEMP_EX(%a0) # set result sgn
6917 clr.b FTEMP_SGN(%a0) # clear temp sign
6918
6919# the number may have become zero after rounding. set ccodes accordingly.
6920unf_res4_chkifzero:
6921 clr.l %d0
6922 tst.l FTEMP_HI(%a0) # is value now a zero?
6923 bne.b unf_res4_cont # no
6924 tst.l FTEMP_LO(%a0)
6925 bne.b unf_res4_cont # no
6926# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6927 bset &z_bit,%d0 # yes; set zero ccode bit
6928
6929unf_res4_cont:
6930
6931#
6932# can inex1 also be set along with unfl and inex2???
6933#
6934# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935#
6936 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937 beq.b unf_res4_end # no
6938 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939
6940unf_res4_end:
6941 add.l &0x4,%sp # clear stack
6942 rts
6943
6944#########################################################################
6945# XDEF **************************************************************** #
6946# ovf_res(): routine to produce the default overflow result of #
6947# an overflowing number. #
6948# ovf_res2(): same as above but the rnd mode/prec are passed #
6949# differently. #
6950# #
6951# XREF **************************************************************** #
6952# none #
6953# #
6954# INPUT *************************************************************** #
6955# d1.b = '-1' => (-); '0' => (+) #
6956# ovf_res(): #
6957# d0 = rnd mode/prec #
6958# ovf_res2(): #
6959# hi(d0) = rnd prec #
6960# lo(d0) = rnd mode #
6961# #
6962# OUTPUT ************************************************************** #
6963# a0 = points to extended precision result #
6964# d0.b = condition code bits #
6965# #
6966# ALGORITHM *********************************************************** #
6967# The default overflow result can be determined by the sign of #
6968# the result and the rounding mode/prec in effect. These bits are #
6969# concatenated together to create an index into the default result #
6970# table. A pointer to the correct result is returned in a0. The #
6971# resulting condition codes are returned in d0 in case the caller #
6972# doesn't want FPSR_cc altered (as is the case for fmove out). #
6973# #
6974#########################################################################
6975
6976 global ovf_res
6977ovf_res:
6978 andi.w &0x10,%d1 # keep result sign
6979 lsr.b &0x4,%d0 # shift prec/mode
6980 or.b %d0,%d1 # concat the two
6981 mov.w %d1,%d0 # make a copy
6982 lsl.b &0x1,%d1 # multiply d1 by 2
6983 bra.b ovf_res_load
6984
6985 global ovf_res2
6986ovf_res2:
6987 and.w &0x10, %d1 # keep result sign
6988 or.b %d0, %d1 # insert rnd mode
6989 swap %d0
6990 or.b %d0, %d1 # insert rnd prec
6991 mov.w %d1, %d0 # make a copy
6992 lsl.b &0x1, %d1 # shift left by 1
6993
6994#
6995# use the rounding mode, precision, and result sign as in index into the
6996# two tables below to fetch the default result and the result ccodes.
6997#
6998ovf_res_load:
6999 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001
7002 rts
7003
7004tbl_ovfl_cc:
7005 byte 0x2, 0x0, 0x0, 0x2
7006 byte 0x2, 0x0, 0x0, 0x2
7007 byte 0x2, 0x0, 0x0, 0x2
7008 byte 0x0, 0x0, 0x0, 0x0
7009 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7010 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7011 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7012
7013tbl_ovfl_result:
7014 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018
7019 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023
7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029 long 0x00000000,0x00000000,0x00000000,0x00000000
7030 long 0x00000000,0x00000000,0x00000000,0x00000000
7031 long 0x00000000,0x00000000,0x00000000,0x00000000
7032 long 0x00000000,0x00000000,0x00000000,0x00000000
7033
7034 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038
7039 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043
7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048
7049#########################################################################
7050# XDEF **************************************************************** #
7051# fout(): move from fp register to memory or data register #
7052# #
7053# XREF **************************************************************** #
7054# _round() - needed to create EXOP for sgl/dbl precision #
7055# norm() - needed to create EXOP for extended precision #
7056# ovf_res() - create default overflow result for sgl/dbl precision#
7057# unf_res() - create default underflow result for sgl/dbl prec. #
7058# dst_dbl() - create rounded dbl precision result. #
7059# dst_sgl() - create rounded sgl precision result. #
7060# fetch_dreg() - fetch dynamic k-factor reg for packed. #
7061# bindec() - convert FP binary number to packed number. #
7062# _mem_write() - write data to memory. #
7063# _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064# _dmem_write_{byte,word,long}() - write data to memory. #
7065# store_dreg_{b,w,l}() - store data to data register file. #
7066# facc_out_{b,w,l,d,x}() - data access error occurred. #
7067# #
7068# INPUT *************************************************************** #
7069# a0 = pointer to extended precision source operand #
7070# d0 = round prec,mode #
7071# #
7072# OUTPUT ************************************************************** #
7073# fp0 : intermediate underflow or overflow result if #
7074# OVFL/UNFL occurred for a sgl or dbl operand #
7075# #
7076# ALGORITHM *********************************************************** #
7077# This routine is accessed by many handlers that need to do an #
7078# opclass three move of an operand out to memory. #
7079# Decode an fmove out (opclass 3) instruction to determine if #
7080# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7081# register or memory. The algorithm uses a standard "fmove" to create #
7082# the rounded result. Also, since exceptions are disabled, this also #
7083# create the correct OPERR default result if appropriate. #
7084# For sgl or dbl precision, overflow or underflow can occur. If #
7085# either occurs and is enabled, the EXOP. #
7086# For extended precision, the stacked <ea> must be fixed along #
7087# w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7088# the source is a denorm and if underflow is enabled, an EXOP must be #
7089# created. #
7090# For packed, the k-factor must be fetched from the instruction #
7091# word or a data register. The <ea> must be fixed as w/ extended #
7092# precision. Then, bindec() is called to create the appropriate #
7093# packed result. #
7094# If at any time an access error is flagged by one of the move- #
7095# to-memory routines, then a special exit must be made so that the #
7096# access error can be handled properly. #
7097# #
7098#########################################################################
7099
7100 global fout
7101fout:
7102 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104 jmp (tbl_fout.b,%pc,%a1) # jump to routine
7105
7106 swbeg &0x8
7107tbl_fout:
7108 short fout_long - tbl_fout
7109 short fout_sgl - tbl_fout
7110 short fout_ext - tbl_fout
7111 short fout_pack - tbl_fout
7112 short fout_word - tbl_fout
7113 short fout_dbl - tbl_fout
7114 short fout_byte - tbl_fout
7115 short fout_pack - tbl_fout
7116
7117#################################################################
7118# fmove.b out ###################################################
7119#################################################################
7120
7121# Only "Unimplemented Data Type" exceptions enter here. The operand
7122# is either a DENORM or a NORM.
7123fout_byte:
7124 tst.b STAG(%a6) # is operand normalized?
7125 bne.b fout_byte_denorm # no
7126
7127 fmovm.x SRC(%a0),&0x80 # load value
7128
7129fout_byte_norm:
7130 fmov.l %d0,%fpcr # insert rnd prec,mode
7131
7132 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7133
7134 fmov.l &0x0,%fpcr # clear FPCR
7135 fmov.l %fpsr,%d1 # fetch FPSR
7136 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7137
7138 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7139 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7140 beq.b fout_byte_dn # must save to integer regfile
7141
7142 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7143 bsr.l _dmem_write_byte # write byte
7144
7145 tst.l %d1 # did dstore fail?
7146 bne.l facc_out_b # yes
7147
7148 rts
7149
7150fout_byte_dn:
7151 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7152 andi.w &0x7,%d1
7153 bsr.l store_dreg_b
7154 rts
7155
7156fout_byte_denorm:
7157 mov.l SRC_EX(%a0),%d1
7158 andi.l &0x80000000,%d1 # keep DENORM sign
7159 ori.l &0x00800000,%d1 # make smallest sgl
7160 fmov.s %d1,%fp0
7161 bra.b fout_byte_norm
7162
7163#################################################################
7164# fmove.w out ###################################################
7165#################################################################
7166
7167# Only "Unimplemented Data Type" exceptions enter here. The operand
7168# is either a DENORM or a NORM.
7169fout_word:
7170 tst.b STAG(%a6) # is operand normalized?
7171 bne.b fout_word_denorm # no
7172
7173 fmovm.x SRC(%a0),&0x80 # load value
7174
7175fout_word_norm:
7176 fmov.l %d0,%fpcr # insert rnd prec:mode
7177
7178 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7179
7180 fmov.l &0x0,%fpcr # clear FPCR
7181 fmov.l %fpsr,%d1 # fetch FPSR
7182 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7183
7184 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7185 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7186 beq.b fout_word_dn # must save to integer regfile
7187
7188 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7189 bsr.l _dmem_write_word # write word
7190
7191 tst.l %d1 # did dstore fail?
7192 bne.l facc_out_w # yes
7193
7194 rts
7195
7196fout_word_dn:
7197 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7198 andi.w &0x7,%d1
7199 bsr.l store_dreg_w
7200 rts
7201
7202fout_word_denorm:
7203 mov.l SRC_EX(%a0),%d1
7204 andi.l &0x80000000,%d1 # keep DENORM sign
7205 ori.l &0x00800000,%d1 # make smallest sgl
7206 fmov.s %d1,%fp0
7207 bra.b fout_word_norm
7208
7209#################################################################
7210# fmove.l out ###################################################
7211#################################################################
7212
7213# Only "Unimplemented Data Type" exceptions enter here. The operand
7214# is either a DENORM or a NORM.
7215fout_long:
7216 tst.b STAG(%a6) # is operand normalized?
7217 bne.b fout_long_denorm # no
7218
7219 fmovm.x SRC(%a0),&0x80 # load value
7220
7221fout_long_norm:
7222 fmov.l %d0,%fpcr # insert rnd prec:mode
7223
7224 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7225
7226 fmov.l &0x0,%fpcr # clear FPCR
7227 fmov.l %fpsr,%d1 # fetch FPSR
7228 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7229
7230fout_long_write:
7231 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7232 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7233 beq.b fout_long_dn # must save to integer regfile
7234
7235 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7236 bsr.l _dmem_write_long # write long
7237
7238 tst.l %d1 # did dstore fail?
7239 bne.l facc_out_l # yes
7240
7241 rts
7242
7243fout_long_dn:
7244 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7245 andi.w &0x7,%d1
7246 bsr.l store_dreg_l
7247 rts
7248
7249fout_long_denorm:
7250 mov.l SRC_EX(%a0),%d1
7251 andi.l &0x80000000,%d1 # keep DENORM sign
7252 ori.l &0x00800000,%d1 # make smallest sgl
7253 fmov.s %d1,%fp0
7254 bra.b fout_long_norm
7255
7256#################################################################
7257# fmove.x out ###################################################
7258#################################################################
7259
7260# Only "Unimplemented Data Type" exceptions enter here. The operand
7261# is either a DENORM or a NORM.
7262# The DENORM causes an Underflow exception.
7263fout_ext:
7264
7265# we copy the extended precision result to FP_SCR0 so that the reserved
7266# 16-bit field gets zeroed. we do this since we promise not to disturb
7267# what's at SRC(a0).
7268 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7269 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7270 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7271 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7272
7273 fmovm.x SRC(%a0),&0x80 # return result
7274
7275 bsr.l _calc_ea_fout # fix stacked <ea>
7276
7277 mov.l %a0,%a1 # pass: dst addr
7278 lea FP_SCR0(%a6),%a0 # pass: src addr
7279 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7280
7281# we must not yet write the extended precision data to the stack
7282# in the pre-decrement case from supervisor mode or else we'll corrupt
7283# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7285 beq.b fout_ext_a7
7286
7287 bsr.l _dmem_write # write ext prec number to memory
7288
7289 tst.l %d1 # did dstore fail?
7290 bne.w fout_ext_err # yes
7291
7292 tst.b STAG(%a6) # is operand normalized?
7293 bne.b fout_ext_denorm # no
7294 rts
7295
7296# the number is a DENORM. must set the underflow exception bit
7297fout_ext_denorm:
7298 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299
7300 mov.b FPCR_ENABLE(%a6),%d0
7301 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7302 bne.b fout_ext_exc # yes
7303 rts
7304
7305# we don't want to do the write if the exception occurred in supervisor mode
7306# so _mem_write2() handles this for us.
7307fout_ext_a7:
7308 bsr.l _mem_write2 # write ext prec number to memory
7309
7310 tst.l %d1 # did dstore fail?
7311 bne.w fout_ext_err # yes
7312
7313 tst.b STAG(%a6) # is operand normalized?
7314 bne.b fout_ext_denorm # no
7315 rts
7316
7317fout_ext_exc:
7318 lea FP_SCR0(%a6),%a0
7319 bsr.l norm # normalize the mantissa
7320 neg.w %d0 # new exp = -(shft amt)
7321 andi.w &0x7fff,%d0
7322 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7323 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7324 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7325 rts
7326
7327fout_ext_err:
7328 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7329 bra.l facc_out_x
7330
7331#########################################################################
7332# fmove.s out ###########################################################
7333#########################################################################
7334fout_sgl:
7335 andi.b &0x30,%d0 # clear rnd prec
7336 ori.b &s_mode*0x10,%d0 # insert sgl prec
7337 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7338
7339#
7340# operand is a normalized number. first, we check to see if the move out
7341# would cause either an underflow or overflow. these cases are handled
7342# separately. otherwise, set the FPCR to the proper rounding mode and
7343# execute the move.
7344#
7345 mov.w SRC_EX(%a0),%d0 # extract exponent
7346 andi.w &0x7fff,%d0 # strip sign
7347
7348 cmpi.w %d0,&SGL_HI # will operand overflow?
7349 bgt.w fout_sgl_ovfl # yes; go handle OVFL
7350 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7351 cmpi.w %d0,&SGL_LO # will operand underflow?
7352 blt.w fout_sgl_unfl # yes; go handle underflow
7353
7354#
7355# NORMs(in range) can be stored out by a simple "fmov.s"
7356# Unnormalized inputs can come through this point.
7357#
7358fout_sgl_exg:
7359 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7360
7361 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7362 fmov.l &0x0,%fpsr # clear FPSR
7363
7364 fmov.s %fp0,%d0 # store does convert and round
7365
7366 fmov.l &0x0,%fpcr # clear FPCR
7367 fmov.l %fpsr,%d1 # save FPSR
7368
7369 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7370
7371fout_sgl_exg_write:
7372 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7373 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7374 beq.b fout_sgl_exg_write_dn # must save to integer regfile
7375
7376 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7377 bsr.l _dmem_write_long # write long
7378
7379 tst.l %d1 # did dstore fail?
7380 bne.l facc_out_l # yes
7381
7382 rts
7383
7384fout_sgl_exg_write_dn:
7385 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7386 andi.w &0x7,%d1
7387 bsr.l store_dreg_l
7388 rts
7389
7390#
7391# here, we know that the operand would UNFL if moved out to single prec,
7392# so, denorm and round and then use generic store single routine to
7393# write the value to memory.
7394#
7395fout_sgl_unfl:
7396 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397
7398 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7399 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7400 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7401 mov.l %a0,-(%sp)
7402
7403 clr.l %d0 # pass: S.F. = 0
7404
7405 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7406 bne.b fout_sgl_unfl_cont # let DENORMs fall through
7407
7408 lea FP_SCR0(%a6),%a0
7409 bsr.l norm # normalize the DENORM
7410
7411fout_sgl_unfl_cont:
7412 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7413 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7414 bsr.l unf_res # calc default underflow result
7415
7416 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7417 bsr.l dst_sgl # convert to single prec
7418
7419 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7420 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7421 beq.b fout_sgl_unfl_dn # must save to integer regfile
7422
7423 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7424 bsr.l _dmem_write_long # write long
7425
7426 tst.l %d1 # did dstore fail?
7427 bne.l facc_out_l # yes
7428
7429 bra.b fout_sgl_unfl_chkexc
7430
7431fout_sgl_unfl_dn:
7432 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7433 andi.w &0x7,%d1
7434 bsr.l store_dreg_l
7435
7436fout_sgl_unfl_chkexc:
7437 mov.b FPCR_ENABLE(%a6),%d1
7438 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7439 bne.w fout_sd_exc_unfl # yes
7440 addq.l &0x4,%sp
7441 rts
7442
7443#
7444# it's definitely an overflow so call ovf_res to get the correct answer
7445#
7446fout_sgl_ovfl:
7447 tst.b 3+SRC_HI(%a0) # is result inexact?
7448 bne.b fout_sgl_ovfl_inex2
7449 tst.l SRC_LO(%a0) # is result inexact?
7450 bne.b fout_sgl_ovfl_inex2
7451 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452 bra.b fout_sgl_ovfl_cont
7453fout_sgl_ovfl_inex2:
7454 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455
7456fout_sgl_ovfl_cont:
7457 mov.l %a0,-(%sp)
7458
7459# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460# overflow result. DON'T save the returned ccodes from ovf_res() since
7461# fmove out doesn't alter them.
7462 tst.b SRC_EX(%a0) # is operand negative?
7463 smi %d1 # set if so
7464 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7465 bsr.l ovf_res # calc OVFL result
7466 fmovm.x (%a0),&0x80 # load default overflow result
7467 fmov.s %fp0,%d0 # store to single
7468
7469 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7470 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7471 beq.b fout_sgl_ovfl_dn # must save to integer regfile
7472
7473 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7474 bsr.l _dmem_write_long # write long
7475
7476 tst.l %d1 # did dstore fail?
7477 bne.l facc_out_l # yes
7478
7479 bra.b fout_sgl_ovfl_chkexc
7480
7481fout_sgl_ovfl_dn:
7482 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7483 andi.w &0x7,%d1
7484 bsr.l store_dreg_l
7485
7486fout_sgl_ovfl_chkexc:
7487 mov.b FPCR_ENABLE(%a6),%d1
7488 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7489 bne.w fout_sd_exc_ovfl # yes
7490 addq.l &0x4,%sp
7491 rts
7492
7493#
7494# move out MAY overflow:
7495# (1) force the exp to 0x3fff
7496# (2) do a move w/ appropriate rnd mode
7497# (3) if exp still equals zero, then insert original exponent
7498# for the correct result.
7499# if exp now equals one, then it overflowed so call ovf_res.
7500#
7501fout_sgl_may_ovfl:
7502 mov.w SRC_EX(%a0),%d1 # fetch current sign
7503 andi.w &0x8000,%d1 # keep it,clear exp
7504 ori.w &0x3fff,%d1 # insert exp = 0
7505 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7506 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508
7509 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7510
7511 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7512 fmov.l &0x0,%fpcr # clear FPCR
7513
7514 fabs.x %fp0 # need absolute value
7515 fcmp.b %fp0,&0x2 # did exponent increase?
7516 fblt.w fout_sgl_exg # no; go finish NORM
7517 bra.w fout_sgl_ovfl # yes; go handle overflow
7518
7519################
7520
7521fout_sd_exc_unfl:
7522 mov.l (%sp)+,%a0
7523
7524 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7525 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7526 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7527
7528 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7529 bne.b fout_sd_exc_cont # no
7530
7531 lea FP_SCR0(%a6),%a0
7532 bsr.l norm
7533 neg.l %d0
7534 andi.w &0x7fff,%d0
7535 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7536 bra.b fout_sd_exc_cont
7537
7538fout_sd_exc:
7539fout_sd_exc_ovfl:
7540 mov.l (%sp)+,%a0 # restore a0
7541
7542 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7543 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7544 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7545
7546fout_sd_exc_cont:
7547 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7548 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7549 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7550
7551 mov.b 3+L_SCR3(%a6),%d1
7552 lsr.b &0x4,%d1
7553 andi.w &0x0c,%d1
7554 swap %d1
7555 mov.b 3+L_SCR3(%a6),%d1
7556 lsr.b &0x4,%d1
7557 andi.w &0x03,%d1
7558 clr.l %d0 # pass: zero g,r,s
7559 bsr.l _round # round the DENORM
7560
7561 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7562 beq.b fout_sd_exc_done # no
7563 bset &0x7,FP_SCR0_EX(%a6) # yes
7564
7565fout_sd_exc_done:
7566 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7567 rts
7568
7569#################################################################
7570# fmove.d out ###################################################
7571#################################################################
7572fout_dbl:
7573 andi.b &0x30,%d0 # clear rnd prec
7574 ori.b &d_mode*0x10,%d0 # insert dbl prec
7575 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7576
7577#
7578# operand is a normalized number. first, we check to see if the move out
7579# would cause either an underflow or overflow. these cases are handled
7580# separately. otherwise, set the FPCR to the proper rounding mode and
7581# execute the move.
7582#
7583 mov.w SRC_EX(%a0),%d0 # extract exponent
7584 andi.w &0x7fff,%d0 # strip sign
7585
7586 cmpi.w %d0,&DBL_HI # will operand overflow?
7587 bgt.w fout_dbl_ovfl # yes; go handle OVFL
7588 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7589 cmpi.w %d0,&DBL_LO # will operand underflow?
7590 blt.w fout_dbl_unfl # yes; go handle underflow
7591
7592#
7593# NORMs(in range) can be stored out by a simple "fmov.d"
7594# Unnormalized inputs can come through this point.
7595#
7596fout_dbl_exg:
7597 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7598
7599 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7600 fmov.l &0x0,%fpsr # clear FPSR
7601
7602 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7603
7604 fmov.l &0x0,%fpcr # clear FPCR
7605 fmov.l %fpsr,%d0 # save FPSR
7606
7607 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7608
7609 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7610 lea L_SCR1(%a6),%a0 # pass: src addr
7611 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7612 bsr.l _dmem_write # store dbl fop to memory
7613
7614 tst.l %d1 # did dstore fail?
7615 bne.l facc_out_d # yes
7616
7617 rts # no; so we're finished
7618
7619#
7620# here, we know that the operand would UNFL if moved out to double prec,
7621# so, denorm and round and then use generic store double routine to
7622# write the value to memory.
7623#
7624fout_dbl_unfl:
7625 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626
7627 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7628 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7629 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7630 mov.l %a0,-(%sp)
7631
7632 clr.l %d0 # pass: S.F. = 0
7633
7634 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7635 bne.b fout_dbl_unfl_cont # let DENORMs fall through
7636
7637 lea FP_SCR0(%a6),%a0
7638 bsr.l norm # normalize the DENORM
7639
7640fout_dbl_unfl_cont:
7641 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7643 bsr.l unf_res # calc default underflow result
7644
7645 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7646 bsr.l dst_dbl # convert to single prec
7647 mov.l %d0,L_SCR1(%a6)
7648 mov.l %d1,L_SCR2(%a6)
7649
7650 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7651 lea L_SCR1(%a6),%a0 # pass: src addr
7652 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7653 bsr.l _dmem_write # store dbl fop to memory
7654
7655 tst.l %d1 # did dstore fail?
7656 bne.l facc_out_d # yes
7657
7658 mov.b FPCR_ENABLE(%a6),%d1
7659 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7660 bne.w fout_sd_exc_unfl # yes
7661 addq.l &0x4,%sp
7662 rts
7663
7664#
7665# it's definitely an overflow so call ovf_res to get the correct answer
7666#
7667fout_dbl_ovfl:
7668 mov.w 2+SRC_LO(%a0),%d0
7669 andi.w &0x7ff,%d0
7670 bne.b fout_dbl_ovfl_inex2
7671
7672 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673 bra.b fout_dbl_ovfl_cont
7674fout_dbl_ovfl_inex2:
7675 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676
7677fout_dbl_ovfl_cont:
7678 mov.l %a0,-(%sp)
7679
7680# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681# overflow result. DON'T save the returned ccodes from ovf_res() since
7682# fmove out doesn't alter them.
7683 tst.b SRC_EX(%a0) # is operand negative?
7684 smi %d1 # set if so
7685 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7686 bsr.l ovf_res # calc OVFL result
7687 fmovm.x (%a0),&0x80 # load default overflow result
7688 fmov.d %fp0,L_SCR1(%a6) # store to double
7689
7690 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7691 lea L_SCR1(%a6),%a0 # pass: src addr
7692 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7693 bsr.l _dmem_write # store dbl fop to memory
7694
7695 tst.l %d1 # did dstore fail?
7696 bne.l facc_out_d # yes
7697
7698 mov.b FPCR_ENABLE(%a6),%d1
7699 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7700 bne.w fout_sd_exc_ovfl # yes
7701 addq.l &0x4,%sp
7702 rts
7703
7704#
7705# move out MAY overflow:
7706# (1) force the exp to 0x3fff
7707# (2) do a move w/ appropriate rnd mode
7708# (3) if exp still equals zero, then insert original exponent
7709# for the correct result.
7710# if exp now equals one, then it overflowed so call ovf_res.
7711#
7712fout_dbl_may_ovfl:
7713 mov.w SRC_EX(%a0),%d1 # fetch current sign
7714 andi.w &0x8000,%d1 # keep it,clear exp
7715 ori.w &0x3fff,%d1 # insert exp = 0
7716 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7717 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719
7720 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7721
7722 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7723 fmov.l &0x0,%fpcr # clear FPCR
7724
7725 fabs.x %fp0 # need absolute value
7726 fcmp.b %fp0,&0x2 # did exponent increase?
7727 fblt.w fout_dbl_exg # no; go finish NORM
7728 bra.w fout_dbl_ovfl # yes; go handle overflow
7729
7730#########################################################################
7731# XDEF **************************************************************** #
7732# dst_dbl(): create double precision value from extended prec. #
7733# #
7734# XREF **************************************************************** #
7735# None #
7736# #
7737# INPUT *************************************************************** #
7738# a0 = pointer to source operand in extended precision #
7739# #
7740# OUTPUT ************************************************************** #
7741# d0 = hi(double precision result) #
7742# d1 = lo(double precision result) #
7743# #
7744# ALGORITHM *********************************************************** #
7745# #
7746# Changes extended precision to double precision. #
7747# Note: no attempt is made to round the extended value to double. #
7748# dbl_sign = ext_sign #
7749# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7750# get rid of ext integer bit #
7751# dbl_mant = ext_mant{62:12} #
7752# #
7753# --------------- --------------- --------------- #
7754# extended -> |s| exp | |1| ms mant | | ls mant | #
7755# --------------- --------------- --------------- #
7756# 95 64 63 62 32 31 11 0 #
7757# | | #
7758# | | #
7759# | | #
7760# v v #
7761# --------------- --------------- #
7762# double -> |s|exp| mant | | mant | #
7763# --------------- --------------- #
7764# 63 51 32 31 0 #
7765# #
7766#########################################################################
7767
7768dst_dbl:
7769 clr.l %d0 # clear d0
7770 mov.w FTEMP_EX(%a0),%d0 # get exponent
7771 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7772 addi.w &DBL_BIAS,%d0 # add double precision bias
7773 tst.b FTEMP_HI(%a0) # is number a denorm?
7774 bmi.b dst_get_dupper # no
7775 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7776dst_get_dupper:
7777 swap %d0 # d0 now in upper word
7778 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7779 tst.b FTEMP_EX(%a0) # test sign
7780 bpl.b dst_get_dman # if positive, go process mantissa
7781 bset &0x1f,%d0 # if negative, set sign
7782dst_get_dman:
7783 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7784 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7785 or.l %d1,%d0 # put these bits in ms word of double
7786 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7787 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7788 mov.l &21,%d0 # load shift count
7789 lsl.l %d0,%d1 # put lower 11 bits in upper bits
7790 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7791 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7792 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7793 mov.l L_SCR2(%a6),%d1
7794 or.l %d0,%d1 # put them in double result
7795 mov.l L_SCR1(%a6),%d0
7796 rts
7797
7798#########################################################################
7799# XDEF **************************************************************** #
7800# dst_sgl(): create single precision value from extended prec #
7801# #
7802# XREF **************************************************************** #
7803# #
7804# INPUT *************************************************************** #
7805# a0 = pointer to source operand in extended precision #
7806# #
7807# OUTPUT ************************************************************** #
7808# d0 = single precision result #
7809# #
7810# ALGORITHM *********************************************************** #
7811# #
7812# Changes extended precision to single precision. #
7813# sgl_sign = ext_sign #
7814# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7815# get rid of ext integer bit #
7816# sgl_mant = ext_mant{62:12} #
7817# #
7818# --------------- --------------- --------------- #
7819# extended -> |s| exp | |1| ms mant | | ls mant | #
7820# --------------- --------------- --------------- #
7821# 95 64 63 62 40 32 31 12 0 #
7822# | | #
7823# | | #
7824# | | #
7825# v v #
7826# --------------- #
7827# single -> |s|exp| mant | #
7828# --------------- #
7829# 31 22 0 #
7830# #
7831#########################################################################
7832
7833dst_sgl:
7834 clr.l %d0
7835 mov.w FTEMP_EX(%a0),%d0 # get exponent
7836 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7837 addi.w &SGL_BIAS,%d0 # add single precision bias
7838 tst.b FTEMP_HI(%a0) # is number a denorm?
7839 bmi.b dst_get_supper # no
7840 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7841dst_get_supper:
7842 swap %d0 # put exp in upper word of d0
7843 lsl.l &0x7,%d0 # shift it into single exp bits
7844 tst.b FTEMP_EX(%a0) # test sign
7845 bpl.b dst_get_sman # if positive, continue
7846 bset &0x1f,%d0 # if negative, put in sign first
7847dst_get_sman:
7848 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7849 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7850 lsr.l &0x8,%d1 # and put them flush right
7851 or.l %d1,%d0 # put these bits in ms word of single
7852 rts
7853
7854##############################################################################
7855fout_pack:
7856 bsr.l _calc_ea_fout # fetch the <ea>
7857 mov.l %a0,-(%sp)
7858
7859 mov.b STAG(%a6),%d0 # fetch input type
7860 bne.w fout_pack_not_norm # input is not NORM
7861
7862fout_pack_norm:
7863 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7864 beq.b fout_pack_s # static
7865
7866fout_pack_d:
7867 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7868 lsr.b &0x4,%d1
7869 andi.w &0x7,%d1
7870
7871 bsr.l fetch_dreg # fetch Dn w/ k-factor
7872
7873 bra.b fout_pack_type
7874fout_pack_s:
7875 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7876
7877fout_pack_type:
7878 bfexts %d0{&25:&7},%d0 # extract k-factor
7879 mov.l %d0,-(%sp)
7880
7881 lea FP_SRC(%a6),%a0 # pass: ptr to input
7882
7883# bindec is currently scrambling FP_SRC for denorm inputs.
7884# we'll have to change this, but for now, tough luck!!!
7885 bsr.l bindec # convert xprec to packed
7886
7887# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889
7890 mov.l (%sp)+,%d0
7891
7892 tst.b 3+FP_SCR0_EX(%a6)
7893 bne.b fout_pack_set
7894 tst.l FP_SCR0_HI(%a6)
7895 bne.b fout_pack_set
7896 tst.l FP_SCR0_LO(%a6)
7897 bne.b fout_pack_set
7898
7899# add the extra condition that only if the k-factor was zero, too, should
7900# we zero the exponent
7901 tst.l %d0
7902 bne.b fout_pack_set
7903# "mantissa" is all zero which means that the answer is zero. but, the '040
7904# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905# if the mantissa is zero, I will zero the exponent, too.
7906# the question now is whether the exponents sign bit is allowed to be non-zero
7907# for a zero, also...
7908 andi.w &0xf000,FP_SCR0(%a6)
7909
7910fout_pack_set:
7911
7912 lea FP_SCR0(%a6),%a0 # pass: src addr
7913
7914fout_pack_write:
7915 mov.l (%sp)+,%a1 # pass: dst addr
7916 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7917
7918 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7919 beq.b fout_pack_a7
7920
7921 bsr.l _dmem_write # write ext prec number to memory
7922
7923 tst.l %d1 # did dstore fail?
7924 bne.w fout_ext_err # yes
7925
7926 rts
7927
7928# we don't want to do the write if the exception occurred in supervisor mode
7929# so _mem_write2() handles this for us.
7930fout_pack_a7:
7931 bsr.l _mem_write2 # write ext prec number to memory
7932
7933 tst.l %d1 # did dstore fail?
7934 bne.w fout_ext_err # yes
7935
7936 rts
7937
7938fout_pack_not_norm:
7939 cmpi.b %d0,&DENORM # is it a DENORM?
7940 beq.w fout_pack_norm # yes
7941 lea FP_SRC(%a6),%a0
7942 clr.w 2+FP_SRC_EX(%a6)
7943 cmpi.b %d0,&SNAN # is it an SNAN?
7944 beq.b fout_pack_snan # yes
7945 bra.b fout_pack_write # no
7946
7947fout_pack_snan:
7948 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949 bset &0x6,FP_SRC_HI(%a6) # set snan bit
7950 bra.b fout_pack_write
7951
7952#########################################################################
7953# XDEF **************************************************************** #
7954# fmul(): emulates the fmul instruction #
7955# fsmul(): emulates the fsmul instruction #
7956# fdmul(): emulates the fdmul instruction #
7957# #
7958# XREF **************************************************************** #
7959# scale_to_zero_src() - scale src exponent to zero #
7960# scale_to_zero_dst() - scale dst exponent to zero #
7961# unf_res() - return default underflow result #
7962# ovf_res() - return default overflow result #
7963# res_qnan() - return QNAN result #
7964# res_snan() - return SNAN result #
7965# #
7966# INPUT *************************************************************** #
7967# a0 = pointer to extended precision source operand #
7968# a1 = pointer to extended precision destination operand #
7969# d0 rnd prec,mode #
7970# #
7971# OUTPUT ************************************************************** #
7972# fp0 = result #
7973# fp1 = EXOP (if exception occurred) #
7974# #
7975# ALGORITHM *********************************************************** #
7976# Handle NANs, infinities, and zeroes as special cases. Divide #
7977# norms/denorms into ext/sgl/dbl precision. #
7978# For norms/denorms, scale the exponents such that a multiply #
7979# instruction won't cause an exception. Use the regular fmul to #
7980# compute a result. Check if the regular operands would have taken #
7981# an exception. If so, return the default overflow/underflow result #
7982# and return the EXOP if exceptions are enabled. Else, scale the #
7983# result operand to the proper exponent. #
7984# #
7985#########################################################################
7986
7987 align 0x10
7988tbl_fmul_ovfl:
7989 long 0x3fff - 0x7ffe # ext_max
7990 long 0x3fff - 0x407e # sgl_max
7991 long 0x3fff - 0x43fe # dbl_max
7992tbl_fmul_unfl:
7993 long 0x3fff + 0x0001 # ext_unfl
7994 long 0x3fff - 0x3f80 # sgl_unfl
7995 long 0x3fff - 0x3c00 # dbl_unfl
7996
7997 global fsmul
7998fsmul:
7999 andi.b &0x30,%d0 # clear rnd prec
8000 ori.b &s_mode*0x10,%d0 # insert sgl prec
8001 bra.b fmul
8002
8003 global fdmul
8004fdmul:
8005 andi.b &0x30,%d0
8006 ori.b &d_mode*0x10,%d0 # insert dbl prec
8007
8008 global fmul
8009fmul:
8010 mov.l %d0,L_SCR3(%a6) # store rnd info
8011
8012 clr.w %d1
8013 mov.b DTAG(%a6),%d1
8014 lsl.b &0x3,%d1
8015 or.b STAG(%a6),%d1 # combine src tags
8016 bne.w fmul_not_norm # optimize on non-norm input
8017
8018fmul_norm:
8019 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8020 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8021 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8022
8023 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8024 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8025 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8026
8027 bsr.l scale_to_zero_src # scale src exponent
8028 mov.l %d0,-(%sp) # save scale factor 1
8029
8030 bsr.l scale_to_zero_dst # scale dst exponent
8031
8032 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8033
8034 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8035 lsr.b &0x6,%d1 # shift to lo bits
8036 mov.l (%sp)+,%d0 # load S.F.
8037 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038 beq.w fmul_may_ovfl # result may rnd to overflow
8039 blt.w fmul_ovfl # result will overflow
8040
8041 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042 beq.w fmul_may_unfl # result may rnd to no unfl
8043 bgt.w fmul_unfl # result will underflow
8044
8045#
8046# NORMAL:
8047# - the result of the multiply operation will neither overflow nor underflow.
8048# - do the multiply to the proper precision and rounding mode.
8049# - scale the result exponent using the scale factor. if both operands were
8050# normalized then we really don't need to go through this scaling. but for now,
8051# this will do.
8052#
8053fmul_normal:
8054 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8055
8056 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8057 fmov.l &0x0,%fpsr # clear FPSR
8058
8059 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8060
8061 fmov.l %fpsr,%d1 # save status
8062 fmov.l &0x0,%fpcr # clear FPCR
8063
8064 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8065
8066fmul_normal_exit:
8067 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8068 mov.l %d2,-(%sp) # save d2
8069 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8070 mov.l %d1,%d2 # make a copy
8071 andi.l &0x7fff,%d1 # strip sign
8072 andi.w &0x8000,%d2 # keep old sign
8073 sub.l %d0,%d1 # add scale factor
8074 or.w %d2,%d1 # concat old sign,new exp
8075 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8076 mov.l (%sp)+,%d2 # restore d2
8077 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8078 rts
8079
8080#
8081# OVERFLOW:
8082# - the result of the multiply operation is an overflow.
8083# - do the multiply to the proper precision and rounding mode in order to
8084# set the inexact bits.
8085# - calculate the default result and return it in fp0.
8086# - if overflow or inexact is enabled, we need a multiply result rounded to
8087# extended precision. if the original operation was extended, then we have this
8088# result. if the original operation was single or double, we have to do another
8089# multiply using extended precision and the correct rounding mode. the result
8090# of this operation then has its exponent scaled by -0x6000 to create the
8091# exceptional operand.
8092#
8093fmul_ovfl:
8094 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8095
8096 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8097 fmov.l &0x0,%fpsr # clear FPSR
8098
8099 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8100
8101 fmov.l %fpsr,%d1 # save status
8102 fmov.l &0x0,%fpcr # clear FPCR
8103
8104 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8105
8106# save setting this until now because this is where fmul_may_ovfl may jump in
8107fmul_ovfl_tst:
8108 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109
8110 mov.b FPCR_ENABLE(%a6),%d1
8111 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8112 bne.b fmul_ovfl_ena # yes
8113
8114# calculate the default result
8115fmul_ovfl_dis:
8116 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8117 sne %d1 # set sign param accordingly
8118 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8119 bsr.l ovf_res # calculate default result
8120 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8121 fmovm.x (%a0),&0x80 # return default result in fp0
8122 rts
8123
8124#
8125# OVFL is enabled; Create EXOP:
8126# - if precision is extended, then we have the EXOP. simply bias the exponent
8127# with an extra -0x6000. if the precision is single or double, we need to
8128# calculate a result rounded to extended precision.
8129#
8130fmul_ovfl_ena:
8131 mov.l L_SCR3(%a6),%d1
8132 andi.b &0xc0,%d1 # test the rnd prec
8133 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8134
8135fmul_ovfl_ena_cont:
8136 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8137
8138 mov.l %d2,-(%sp) # save d2
8139 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8140 mov.w %d1,%d2 # make a copy
8141 andi.l &0x7fff,%d1 # strip sign
8142 sub.l %d0,%d1 # add scale factor
8143 subi.l &0x6000,%d1 # subtract bias
8144 andi.w &0x7fff,%d1 # clear sign bit
8145 andi.w &0x8000,%d2 # keep old sign
8146 or.w %d2,%d1 # concat old sign,new exp
8147 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8148 mov.l (%sp)+,%d2 # restore d2
8149 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8150 bra.b fmul_ovfl_dis
8151
8152fmul_ovfl_ena_sd:
8153 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8154
8155 mov.l L_SCR3(%a6),%d1
8156 andi.b &0x30,%d1 # keep rnd mode only
8157 fmov.l %d1,%fpcr # set FPCR
8158
8159 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8160
8161 fmov.l &0x0,%fpcr # clear FPCR
8162 bra.b fmul_ovfl_ena_cont
8163
8164#
8165# may OVERFLOW:
8166# - the result of the multiply operation MAY overflow.
8167# - do the multiply to the proper precision and rounding mode in order to
8168# set the inexact bits.
8169# - calculate the default result and return it in fp0.
8170#
8171fmul_may_ovfl:
8172 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8173
8174 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8175 fmov.l &0x0,%fpsr # clear FPSR
8176
8177 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8178
8179 fmov.l %fpsr,%d1 # save status
8180 fmov.l &0x0,%fpcr # clear FPCR
8181
8182 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8183
8184 fabs.x %fp0,%fp1 # make a copy of result
8185 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8186 fbge.w fmul_ovfl_tst # yes; overflow has occurred
8187
8188# no, it didn't overflow; we have correct result
8189 bra.w fmul_normal_exit
8190
8191#
8192# UNDERFLOW:
8193# - the result of the multiply operation is an underflow.
8194# - do the multiply to the proper precision and rounding mode in order to
8195# set the inexact bits.
8196# - calculate the default result and return it in fp0.
8197# - if overflow or inexact is enabled, we need a multiply result rounded to
8198# extended precision. if the original operation was extended, then we have this
8199# result. if the original operation was single or double, we have to do another
8200# multiply using extended precision and the correct rounding mode. the result
8201# of this operation then has its exponent scaled by -0x6000 to create the
8202# exceptional operand.
8203#
8204fmul_unfl:
8205 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206
8207# for fun, let's use only extended precision, round to zero. then, let
8208# the unf_res() routine figure out all the rest.
8209# will we get the correct answer.
8210 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8211
8212 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8213 fmov.l &0x0,%fpsr # clear FPSR
8214
8215 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8216
8217 fmov.l %fpsr,%d1 # save status
8218 fmov.l &0x0,%fpcr # clear FPCR
8219
8220 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8221
8222 mov.b FPCR_ENABLE(%a6),%d1
8223 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8224 bne.b fmul_unfl_ena # yes
8225
8226fmul_unfl_dis:
8227 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8228
8229 lea FP_SCR0(%a6),%a0 # pass: result addr
8230 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8231 bsr.l unf_res # calculate default result
8232 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8233 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8234 rts
8235
8236#
8237# UNFL is enabled.
8238#
8239fmul_unfl_ena:
8240 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8241
8242 mov.l L_SCR3(%a6),%d1
8243 andi.b &0xc0,%d1 # is precision extended?
8244 bne.b fmul_unfl_ena_sd # no, sgl or dbl
8245
8246# if the rnd mode is anything but RZ, then we have to re-do the above
8247# multiplication because we used RZ for all.
8248 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8249
8250fmul_unfl_ena_cont:
8251 fmov.l &0x0,%fpsr # clear FPSR
8252
8253 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8254
8255 fmov.l &0x0,%fpcr # clear FPCR
8256
8257 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8258 mov.l %d2,-(%sp) # save d2
8259 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8260 mov.l %d1,%d2 # make a copy
8261 andi.l &0x7fff,%d1 # strip sign
8262 andi.w &0x8000,%d2 # keep old sign
8263 sub.l %d0,%d1 # add scale factor
8264 addi.l &0x6000,%d1 # add bias
8265 andi.w &0x7fff,%d1
8266 or.w %d2,%d1 # concat old sign,new exp
8267 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8268 mov.l (%sp)+,%d2 # restore d2
8269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8270 bra.w fmul_unfl_dis
8271
8272fmul_unfl_ena_sd:
8273 mov.l L_SCR3(%a6),%d1
8274 andi.b &0x30,%d1 # use only rnd mode
8275 fmov.l %d1,%fpcr # set FPCR
8276
8277 bra.b fmul_unfl_ena_cont
8278
8279# MAY UNDERFLOW:
8280# -use the correct rounding mode and precision. this code favors operations
8281# that do not underflow.
8282fmul_may_unfl:
8283 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8284
8285 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8286 fmov.l &0x0,%fpsr # clear FPSR
8287
8288 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8289
8290 fmov.l %fpsr,%d1 # save status
8291 fmov.l &0x0,%fpcr # clear FPCR
8292
8293 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8294
8295 fabs.x %fp0,%fp1 # make a copy of result
8296 fcmp.b %fp1,&0x2 # is |result| > 2.b?
8297 fbgt.w fmul_normal_exit # no; no underflow occurred
8298 fblt.w fmul_unfl # yes; underflow occurred
8299
8300#
8301# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302# we don't know if the result was an underflow that rounded up to a 2 or
8303# a normalized number that rounded down to a 2. so, redo the entire operation
8304# using RZ as the rounding mode to see what the pre-rounded result is.
8305# this case should be relatively rare.
8306#
8307 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8308
8309 mov.l L_SCR3(%a6),%d1
8310 andi.b &0xc0,%d1 # keep rnd prec
8311 ori.b &rz_mode*0x10,%d1 # insert RZ
8312
8313 fmov.l %d1,%fpcr # set FPCR
8314 fmov.l &0x0,%fpsr # clear FPSR
8315
8316 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8317
8318 fmov.l &0x0,%fpcr # clear FPCR
8319 fabs.x %fp1 # make absolute value
8320 fcmp.b %fp1,&0x2 # is |result| < 2.b?
8321 fbge.w fmul_normal_exit # no; no underflow occurred
8322 bra.w fmul_unfl # yes, underflow occurred
8323
8324################################################################################
8325
8326#
8327# Multiply: inputs are not both normalized; what are they?
8328#
8329fmul_not_norm:
8330 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331 jmp (tbl_fmul_op.b,%pc,%d1.w)
8332
8333 swbeg &48
8334tbl_fmul_op:
8335 short fmul_norm - tbl_fmul_op # NORM x NORM
8336 short fmul_zero - tbl_fmul_op # NORM x ZERO
8337 short fmul_inf_src - tbl_fmul_op # NORM x INF
8338 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8339 short fmul_norm - tbl_fmul_op # NORM x DENORM
8340 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8341 short tbl_fmul_op - tbl_fmul_op #
8342 short tbl_fmul_op - tbl_fmul_op #
8343
8344 short fmul_zero - tbl_fmul_op # ZERO x NORM
8345 short fmul_zero - tbl_fmul_op # ZERO x ZERO
8346 short fmul_res_operr - tbl_fmul_op # ZERO x INF
8347 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8348 short fmul_zero - tbl_fmul_op # ZERO x DENORM
8349 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8350 short tbl_fmul_op - tbl_fmul_op #
8351 short tbl_fmul_op - tbl_fmul_op #
8352
8353 short fmul_inf_dst - tbl_fmul_op # INF x NORM
8354 short fmul_res_operr - tbl_fmul_op # INF x ZERO
8355 short fmul_inf_dst - tbl_fmul_op # INF x INF
8356 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8357 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8358 short fmul_res_snan - tbl_fmul_op # INF x SNAN
8359 short tbl_fmul_op - tbl_fmul_op #
8360 short tbl_fmul_op - tbl_fmul_op #
8361
8362 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8363 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8364 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8365 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8366 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8367 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8368 short tbl_fmul_op - tbl_fmul_op #
8369 short tbl_fmul_op - tbl_fmul_op #
8370
8371 short fmul_norm - tbl_fmul_op # NORM x NORM
8372 short fmul_zero - tbl_fmul_op # NORM x ZERO
8373 short fmul_inf_src - tbl_fmul_op # NORM x INF
8374 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8375 short fmul_norm - tbl_fmul_op # NORM x DENORM
8376 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8377 short tbl_fmul_op - tbl_fmul_op #
8378 short tbl_fmul_op - tbl_fmul_op #
8379
8380 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8381 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8382 short fmul_res_snan - tbl_fmul_op # SNAN x INF
8383 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8384 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8385 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8386 short tbl_fmul_op - tbl_fmul_op #
8387 short tbl_fmul_op - tbl_fmul_op #
8388
8389fmul_res_operr:
8390 bra.l res_operr
8391fmul_res_snan:
8392 bra.l res_snan
8393fmul_res_qnan:
8394 bra.l res_qnan
8395
8396#
8397# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398#
8399 global fmul_zero # global for fsglmul
8400fmul_zero:
8401 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8402 mov.b DST_EX(%a1),%d1
8403 eor.b %d0,%d1
8404 bpl.b fmul_zero_p # result ZERO is pos.
8405fmul_zero_n:
8406 fmov.s &0x80000000,%fp0 # load -ZERO
8407 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408 rts
8409fmul_zero_p:
8410 fmov.s &0x00000000,%fp0 # load +ZERO
8411 mov.b &z_bmask,FPSR_CC(%a6) # set Z
8412 rts
8413
8414#
8415# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416#
8417# Note: The j-bit for an infinity is a don't-care. However, to be
8418# strictly compatible w/ the 68881/882, we make sure to return an
8419# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420# INFs take priority.
8421#
8422 global fmul_inf_dst # global for fsglmul
8423fmul_inf_dst:
8424 fmovm.x DST(%a1),&0x80 # return INF result in fp0
8425 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8426 mov.b DST_EX(%a1),%d1
8427 eor.b %d0,%d1
8428 bpl.b fmul_inf_dst_p # result INF is pos.
8429fmul_inf_dst_n:
8430 fabs.x %fp0 # clear result sign
8431 fneg.x %fp0 # set result sign
8432 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433 rts
8434fmul_inf_dst_p:
8435 fabs.x %fp0 # clear result sign
8436 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8437 rts
8438
8439 global fmul_inf_src # global for fsglmul
8440fmul_inf_src:
8441 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8442 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8443 mov.b DST_EX(%a1),%d1
8444 eor.b %d0,%d1
8445 bpl.b fmul_inf_dst_p # result INF is pos.
8446 bra.b fmul_inf_dst_n
8447
8448#########################################################################
8449# XDEF **************************************************************** #
8450# fin(): emulates the fmove instruction #
8451# fsin(): emulates the fsmove instruction #
8452# fdin(): emulates the fdmove instruction #
8453# #
8454# XREF **************************************************************** #
8455# norm() - normalize mantissa for EXOP on denorm #
8456# scale_to_zero_src() - scale src exponent to zero #
8457# ovf_res() - return default overflow result #
8458# unf_res() - return default underflow result #
8459# res_qnan_1op() - return QNAN result #
8460# res_snan_1op() - return SNAN result #
8461# #
8462# INPUT *************************************************************** #
8463# a0 = pointer to extended precision source operand #
8464# d0 = round prec/mode #
8465# #
8466# OUTPUT ************************************************************** #
8467# fp0 = result #
8468# fp1 = EXOP (if exception occurred) #
8469# #
8470# ALGORITHM *********************************************************** #
8471# Handle NANs, infinities, and zeroes as special cases. Divide #
8472# norms into extended, single, and double precision. #
8473# Norms can be emulated w/ a regular fmove instruction. For #
8474# sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8475# if the result would have overflowed/underflowed. If so, use unf_res() #
8476# or ovf_res() to return the default result. Also return EXOP if #
8477# exception is enabled. If no exception, return the default result. #
8478# Unnorms don't pass through here. #
8479# #
8480#########################################################################
8481
8482 global fsin
8483fsin:
8484 andi.b &0x30,%d0 # clear rnd prec
8485 ori.b &s_mode*0x10,%d0 # insert sgl precision
8486 bra.b fin
8487
8488 global fdin
8489fdin:
8490 andi.b &0x30,%d0 # clear rnd prec
8491 ori.b &d_mode*0x10,%d0 # insert dbl precision
8492
8493 global fin
8494fin:
8495 mov.l %d0,L_SCR3(%a6) # store rnd info
8496
8497 mov.b STAG(%a6),%d1 # fetch src optype tag
8498 bne.w fin_not_norm # optimize on non-norm input
8499
8500#
8501# FP MOVE IN: NORMs and DENORMs ONLY!
8502#
8503fin_norm:
8504 andi.b &0xc0,%d0 # is precision extended?
8505 bne.w fin_not_ext # no, so go handle dbl or sgl
8506
8507#
8508# precision selected is extended. so...we cannot get an underflow
8509# or overflow because of rounding to the correct precision. so...
8510# skip the scaling and unscaling...
8511#
8512 tst.b SRC_EX(%a0) # is the operand negative?
8513 bpl.b fin_norm_done # no
8514 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8515fin_norm_done:
8516 fmovm.x SRC(%a0),&0x80 # return result in fp0
8517 rts
8518
8519#
8520# for an extended precision DENORM, the UNFL exception bit is set
8521# the accrued bit is NOT set in this instance(no inexactness!)
8522#
8523fin_denorm:
8524 andi.b &0xc0,%d0 # is precision extended?
8525 bne.w fin_not_ext # no, so go handle dbl or sgl
8526
8527 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528 tst.b SRC_EX(%a0) # is the operand negative?
8529 bpl.b fin_denorm_done # no
8530 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8531fin_denorm_done:
8532 fmovm.x SRC(%a0),&0x80 # return result in fp0
8533 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534 bne.b fin_denorm_unfl_ena # yes
8535 rts
8536
8537#
8538# the input is an extended DENORM and underflow is enabled in the FPCR.
8539# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540# exponent and insert back into the operand.
8541#
8542fin_denorm_unfl_ena:
8543 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8546 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8547 bsr.l norm # normalize result
8548 neg.w %d0 # new exponent = -(shft val)
8549 addi.w &0x6000,%d0 # add new bias to exponent
8550 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8551 andi.w &0x8000,%d1 # keep old sign
8552 andi.w &0x7fff,%d0 # clear sign position
8553 or.w %d1,%d0 # concat new exo,old sign
8554 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8555 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8556 rts
8557
8558#
8559# operand is to be rounded to single or double precision
8560#
8561fin_not_ext:
8562 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8563 bne.b fin_dbl
8564
8565#
8566# operand is to be rounded to single precision
8567#
8568fin_sgl:
8569 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8570 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8571 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8572 bsr.l scale_to_zero_src # calculate scale factor
8573
8574 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8575 bge.w fin_sd_unfl # yes; go handle underflow
8576 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8577 beq.w fin_sd_may_ovfl # maybe; go check
8578 blt.w fin_sd_ovfl # yes; go handle overflow
8579
8580#
8581# operand will NOT overflow or underflow when moved into the fp reg file
8582#
8583fin_sd_normal:
8584 fmov.l &0x0,%fpsr # clear FPSR
8585 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8586
8587 fmov.x FP_SCR0(%a6),%fp0 # perform move
8588
8589 fmov.l %fpsr,%d1 # save FPSR
8590 fmov.l &0x0,%fpcr # clear FPCR
8591
8592 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8593
8594fin_sd_normal_exit:
8595 mov.l %d2,-(%sp) # save d2
8596 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8597 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8598 mov.w %d1,%d2 # make a copy
8599 andi.l &0x7fff,%d1 # strip sign
8600 sub.l %d0,%d1 # add scale factor
8601 andi.w &0x8000,%d2 # keep old sign
8602 or.w %d1,%d2 # concat old sign,new exponent
8603 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8604 mov.l (%sp)+,%d2 # restore d2
8605 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8606 rts
8607
8608#
8609# operand is to be rounded to double precision
8610#
8611fin_dbl:
8612 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8613 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8614 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8615 bsr.l scale_to_zero_src # calculate scale factor
8616
8617 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8618 bge.w fin_sd_unfl # yes; go handle underflow
8619 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8620 beq.w fin_sd_may_ovfl # maybe; go check
8621 blt.w fin_sd_ovfl # yes; go handle overflow
8622 bra.w fin_sd_normal # no; ho handle normalized op
8623
8624#
8625# operand WILL underflow when moved in to the fp register file
8626#
8627fin_sd_unfl:
8628 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629
8630 tst.b FP_SCR0_EX(%a6) # is operand negative?
8631 bpl.b fin_sd_unfl_tst
8632 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8633
8634# if underflow or inexact is enabled, then go calculate the EXOP first.
8635fin_sd_unfl_tst:
8636 mov.b FPCR_ENABLE(%a6),%d1
8637 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8638 bne.b fin_sd_unfl_ena # yes
8639
8640fin_sd_unfl_dis:
8641 lea FP_SCR0(%a6),%a0 # pass: result addr
8642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8643 bsr.l unf_res # calculate default result
8644 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8645 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8646 rts
8647
8648#
8649# operand will underflow AND underflow or inexact is enabled.
8650# Therefore, we must return the result rounded to extended precision.
8651#
8652fin_sd_unfl_ena:
8653 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8656
8657 mov.l %d2,-(%sp) # save d2
8658 mov.w %d1,%d2 # make a copy
8659 andi.l &0x7fff,%d1 # strip sign
8660 sub.l %d0,%d1 # subtract scale factor
8661 andi.w &0x8000,%d2 # extract old sign
8662 addi.l &0x6000,%d1 # add new bias
8663 andi.w &0x7fff,%d1
8664 or.w %d1,%d2 # concat old sign,new exp
8665 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8666 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8667 mov.l (%sp)+,%d2 # restore d2
8668 bra.b fin_sd_unfl_dis
8669
8670#
8671# operand WILL overflow.
8672#
8673fin_sd_ovfl:
8674 fmov.l &0x0,%fpsr # clear FPSR
8675 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8676
8677 fmov.x FP_SCR0(%a6),%fp0 # perform move
8678
8679 fmov.l &0x0,%fpcr # clear FPCR
8680 fmov.l %fpsr,%d1 # save FPSR
8681
8682 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8683
8684fin_sd_ovfl_tst:
8685 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686
8687 mov.b FPCR_ENABLE(%a6),%d1
8688 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8689 bne.b fin_sd_ovfl_ena # yes
8690
8691#
8692# OVFL is not enabled; therefore, we must create the default result by
8693# calling ovf_res().
8694#
8695fin_sd_ovfl_dis:
8696 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8697 sne %d1 # set sign param accordingly
8698 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8699 bsr.l ovf_res # calculate default result
8700 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8701 fmovm.x (%a0),&0x80 # return default result in fp0
8702 rts
8703
8704#
8705# OVFL is enabled.
8706# the INEX2 bit has already been updated by the round to the correct precision.
8707# now, round to extended(and don't alter the FPSR).
8708#
8709fin_sd_ovfl_ena:
8710 mov.l %d2,-(%sp) # save d2
8711 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8712 mov.l %d1,%d2 # make a copy
8713 andi.l &0x7fff,%d1 # strip sign
8714 andi.w &0x8000,%d2 # keep old sign
8715 sub.l %d0,%d1 # add scale factor
8716 sub.l &0x6000,%d1 # subtract bias
8717 andi.w &0x7fff,%d1
8718 or.w %d2,%d1
8719 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8720 mov.l (%sp)+,%d2 # restore d2
8721 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8722 bra.b fin_sd_ovfl_dis
8723
8724#
8725# the move in MAY overflow. so...
8726#
8727fin_sd_may_ovfl:
8728 fmov.l &0x0,%fpsr # clear FPSR
8729 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8730
8731 fmov.x FP_SCR0(%a6),%fp0 # perform the move
8732
8733 fmov.l %fpsr,%d1 # save status
8734 fmov.l &0x0,%fpcr # clear FPCR
8735
8736 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8737
8738 fabs.x %fp0,%fp1 # make a copy of result
8739 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8740 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8741
8742# no, it didn't overflow; we have correct result
8743 bra.w fin_sd_normal_exit
8744
8745##########################################################################
8746
8747#
8748# operand is not a NORM: check its optype and branch accordingly
8749#
8750fin_not_norm:
8751 cmpi.b %d1,&DENORM # weed out DENORM
8752 beq.w fin_denorm
8753 cmpi.b %d1,&SNAN # weed out SNANs
8754 beq.l res_snan_1op
8755 cmpi.b %d1,&QNAN # weed out QNANs
8756 beq.l res_qnan_1op
8757
8758#
8759# do the fmove in; at this point, only possible ops are ZERO and INF.
8760# use fmov to determine ccodes.
8761# prec:mode should be zero at this point but it won't affect answer anyways.
8762#
8763 fmov.x SRC(%a0),%fp0 # do fmove in
8764 fmov.l %fpsr,%d0 # no exceptions possible
8765 rol.l &0x8,%d0 # put ccodes in lo byte
8766 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8767 rts
8768
8769#########################################################################
8770# XDEF **************************************************************** #
8771# fdiv(): emulates the fdiv instruction #
8772# fsdiv(): emulates the fsdiv instruction #
8773# fddiv(): emulates the fddiv instruction #
8774# #
8775# XREF **************************************************************** #
8776# scale_to_zero_src() - scale src exponent to zero #
8777# scale_to_zero_dst() - scale dst exponent to zero #
8778# unf_res() - return default underflow result #
8779# ovf_res() - return default overflow result #
8780# res_qnan() - return QNAN result #
8781# res_snan() - return SNAN result #
8782# #
8783# INPUT *************************************************************** #
8784# a0 = pointer to extended precision source operand #
8785# a1 = pointer to extended precision destination operand #
8786# d0 rnd prec,mode #
8787# #
8788# OUTPUT ************************************************************** #
8789# fp0 = result #
8790# fp1 = EXOP (if exception occurred) #
8791# #
8792# ALGORITHM *********************************************************** #
8793# Handle NANs, infinities, and zeroes as special cases. Divide #
8794# norms/denorms into ext/sgl/dbl precision. #
8795# For norms/denorms, scale the exponents such that a divide #
8796# instruction won't cause an exception. Use the regular fdiv to #
8797# compute a result. Check if the regular operands would have taken #
8798# an exception. If so, return the default overflow/underflow result #
8799# and return the EXOP if exceptions are enabled. Else, scale the #
8800# result operand to the proper exponent. #
8801# #
8802#########################################################################
8803
8804 align 0x10
8805tbl_fdiv_unfl:
8806 long 0x3fff - 0x0000 # ext_unfl
8807 long 0x3fff - 0x3f81 # sgl_unfl
8808 long 0x3fff - 0x3c01 # dbl_unfl
8809
8810tbl_fdiv_ovfl:
8811 long 0x3fff - 0x7ffe # ext overflow exponent
8812 long 0x3fff - 0x407e # sgl overflow exponent
8813 long 0x3fff - 0x43fe # dbl overflow exponent
8814
8815 global fsdiv
8816fsdiv:
8817 andi.b &0x30,%d0 # clear rnd prec
8818 ori.b &s_mode*0x10,%d0 # insert sgl prec
8819 bra.b fdiv
8820
8821 global fddiv
8822fddiv:
8823 andi.b &0x30,%d0 # clear rnd prec
8824 ori.b &d_mode*0x10,%d0 # insert dbl prec
8825
8826 global fdiv
8827fdiv:
8828 mov.l %d0,L_SCR3(%a6) # store rnd info
8829
8830 clr.w %d1
8831 mov.b DTAG(%a6),%d1
8832 lsl.b &0x3,%d1
8833 or.b STAG(%a6),%d1 # combine src tags
8834
8835 bne.w fdiv_not_norm # optimize on non-norm input
8836
8837#
8838# DIVIDE: NORMs and DENORMs ONLY!
8839#
8840fdiv_norm:
8841 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8842 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8843 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8844
8845 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8846 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8847 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8848
8849 bsr.l scale_to_zero_src # scale src exponent
8850 mov.l %d0,-(%sp) # save scale factor 1
8851
8852 bsr.l scale_to_zero_dst # scale dst exponent
8853
8854 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8855 add.l %d0,(%sp)
8856
8857 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8858 lsr.b &0x6,%d1 # shift to lo bits
8859 mov.l (%sp)+,%d0 # load S.F.
8860 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861 ble.w fdiv_may_ovfl # result will overflow
8862
8863 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864 beq.w fdiv_may_unfl # maybe
8865 bgt.w fdiv_unfl # yes; go handle underflow
8866
8867fdiv_normal:
8868 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8869
8870 fmov.l L_SCR3(%a6),%fpcr # save FPCR
8871 fmov.l &0x0,%fpsr # clear FPSR
8872
8873 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8874
8875 fmov.l %fpsr,%d1 # save FPSR
8876 fmov.l &0x0,%fpcr # clear FPCR
8877
8878 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8879
8880fdiv_normal_exit:
8881 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8882 mov.l %d2,-(%sp) # store d2
8883 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8884 mov.l %d1,%d2 # make a copy
8885 andi.l &0x7fff,%d1 # strip sign
8886 andi.w &0x8000,%d2 # keep old sign
8887 sub.l %d0,%d1 # add scale factor
8888 or.w %d2,%d1 # concat old sign,new exp
8889 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8890 mov.l (%sp)+,%d2 # restore d2
8891 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8892 rts
8893
8894tbl_fdiv_ovfl2:
8895 long 0x7fff
8896 long 0x407f
8897 long 0x43ff
8898
8899fdiv_no_ovfl:
8900 mov.l (%sp)+,%d0 # restore scale factor
8901 bra.b fdiv_normal_exit
8902
8903fdiv_may_ovfl:
8904 mov.l %d0,-(%sp) # save scale factor
8905
8906 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8907
8908 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8909 fmov.l &0x0,%fpsr # set FPSR
8910
8911 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8912
8913 fmov.l %fpsr,%d0
8914 fmov.l &0x0,%fpcr
8915
8916 or.l %d0,USER_FPSR(%a6) # save INEX,N
8917
8918 fmovm.x &0x01,-(%sp) # save result to stack
8919 mov.w (%sp),%d0 # fetch new exponent
8920 add.l &0xc,%sp # clear result from stack
8921 andi.l &0x7fff,%d0 # strip sign
8922 sub.l (%sp),%d0 # add scale factor
8923 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924 blt.b fdiv_no_ovfl
8925 mov.l (%sp)+,%d0
8926
8927fdiv_ovfl_tst:
8928 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929
8930 mov.b FPCR_ENABLE(%a6),%d1
8931 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8932 bne.b fdiv_ovfl_ena # yes
8933
8934fdiv_ovfl_dis:
8935 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8936 sne %d1 # set sign param accordingly
8937 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8938 bsr.l ovf_res # calculate default result
8939 or.b %d0,FPSR_CC(%a6) # set INF if applicable
8940 fmovm.x (%a0),&0x80 # return default result in fp0
8941 rts
8942
8943fdiv_ovfl_ena:
8944 mov.l L_SCR3(%a6),%d1
8945 andi.b &0xc0,%d1 # is precision extended?
8946 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8947
8948fdiv_ovfl_ena_cont:
8949 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8950
8951 mov.l %d2,-(%sp) # save d2
8952 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8953 mov.w %d1,%d2 # make a copy
8954 andi.l &0x7fff,%d1 # strip sign
8955 sub.l %d0,%d1 # add scale factor
8956 subi.l &0x6000,%d1 # subtract bias
8957 andi.w &0x7fff,%d1 # clear sign bit
8958 andi.w &0x8000,%d2 # keep old sign
8959 or.w %d2,%d1 # concat old sign,new exp
8960 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8961 mov.l (%sp)+,%d2 # restore d2
8962 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8963 bra.b fdiv_ovfl_dis
8964
8965fdiv_ovfl_ena_sd:
8966 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8967
8968 mov.l L_SCR3(%a6),%d1
8969 andi.b &0x30,%d1 # keep rnd mode
8970 fmov.l %d1,%fpcr # set FPCR
8971
8972 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8973
8974 fmov.l &0x0,%fpcr # clear FPCR
8975 bra.b fdiv_ovfl_ena_cont
8976
8977fdiv_unfl:
8978 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979
8980 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8981
8982 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8983 fmov.l &0x0,%fpsr # clear FPSR
8984
8985 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8986
8987 fmov.l %fpsr,%d1 # save status
8988 fmov.l &0x0,%fpcr # clear FPCR
8989
8990 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8991
8992 mov.b FPCR_ENABLE(%a6),%d1
8993 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8994 bne.b fdiv_unfl_ena # yes
8995
8996fdiv_unfl_dis:
8997 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8998
8999 lea FP_SCR0(%a6),%a0 # pass: result addr
9000 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9001 bsr.l unf_res # calculate default result
9002 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9003 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9004 rts
9005
9006#
9007# UNFL is enabled.
9008#
9009fdiv_unfl_ena:
9010 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9011
9012 mov.l L_SCR3(%a6),%d1
9013 andi.b &0xc0,%d1 # is precision extended?
9014 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9015
9016 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9017
9018fdiv_unfl_ena_cont:
9019 fmov.l &0x0,%fpsr # clear FPSR
9020
9021 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9022
9023 fmov.l &0x0,%fpcr # clear FPCR
9024
9025 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9026 mov.l %d2,-(%sp) # save d2
9027 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9028 mov.l %d1,%d2 # make a copy
9029 andi.l &0x7fff,%d1 # strip sign
9030 andi.w &0x8000,%d2 # keep old sign
9031 sub.l %d0,%d1 # add scale factoer
9032 addi.l &0x6000,%d1 # add bias
9033 andi.w &0x7fff,%d1
9034 or.w %d2,%d1 # concat old sign,new exp
9035 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9036 mov.l (%sp)+,%d2 # restore d2
9037 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9038 bra.w fdiv_unfl_dis
9039
9040fdiv_unfl_ena_sd:
9041 mov.l L_SCR3(%a6),%d1
9042 andi.b &0x30,%d1 # use only rnd mode
9043 fmov.l %d1,%fpcr # set FPCR
9044
9045 bra.b fdiv_unfl_ena_cont
9046
9047#
9048# the divide operation MAY underflow:
9049#
9050fdiv_may_unfl:
9051 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9052
9053 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9054 fmov.l &0x0,%fpsr # clear FPSR
9055
9056 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9057
9058 fmov.l %fpsr,%d1 # save status
9059 fmov.l &0x0,%fpcr # clear FPCR
9060
9061 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9062
9063 fabs.x %fp0,%fp1 # make a copy of result
9064 fcmp.b %fp1,&0x1 # is |result| > 1.b?
9065 fbgt.w fdiv_normal_exit # no; no underflow occurred
9066 fblt.w fdiv_unfl # yes; underflow occurred
9067
9068#
9069# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070# we don't know if the result was an underflow that rounded up to a 1
9071# or a normalized number that rounded down to a 1. so, redo the entire
9072# operation using RZ as the rounding mode to see what the pre-rounded
9073# result is. this case should be relatively rare.
9074#
9075 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9076
9077 mov.l L_SCR3(%a6),%d1
9078 andi.b &0xc0,%d1 # keep rnd prec
9079 ori.b &rz_mode*0x10,%d1 # insert RZ
9080
9081 fmov.l %d1,%fpcr # set FPCR
9082 fmov.l &0x0,%fpsr # clear FPSR
9083
9084 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9085
9086 fmov.l &0x0,%fpcr # clear FPCR
9087 fabs.x %fp1 # make absolute value
9088 fcmp.b %fp1,&0x1 # is |result| < 1.b?
9089 fbge.w fdiv_normal_exit # no; no underflow occurred
9090 bra.w fdiv_unfl # yes; underflow occurred
9091
9092############################################################################
9093
9094#
9095# Divide: inputs are not both normalized; what are they?
9096#
9097fdiv_not_norm:
9098 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9100
9101 swbeg &48
9102tbl_fdiv_op:
9103 short fdiv_norm - tbl_fdiv_op # NORM / NORM
9104 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9105 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9106 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9107 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9108 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9109 short tbl_fdiv_op - tbl_fdiv_op #
9110 short tbl_fdiv_op - tbl_fdiv_op #
9111
9112 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9113 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9114 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9115 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9116 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9117 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9118 short tbl_fdiv_op - tbl_fdiv_op #
9119 short tbl_fdiv_op - tbl_fdiv_op #
9120
9121 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9122 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9123 short fdiv_res_operr - tbl_fdiv_op # INF / INF
9124 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9125 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9126 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9127 short tbl_fdiv_op - tbl_fdiv_op #
9128 short tbl_fdiv_op - tbl_fdiv_op #
9129
9130 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9131 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9132 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9133 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9134 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9135 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9136 short tbl_fdiv_op - tbl_fdiv_op #
9137 short tbl_fdiv_op - tbl_fdiv_op #
9138
9139 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9140 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9141 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9142 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9143 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9144 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9145 short tbl_fdiv_op - tbl_fdiv_op #
9146 short tbl_fdiv_op - tbl_fdiv_op #
9147
9148 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9149 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9150 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9151 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9152 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9153 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9154 short tbl_fdiv_op - tbl_fdiv_op #
9155 short tbl_fdiv_op - tbl_fdiv_op #
9156
9157fdiv_res_qnan:
9158 bra.l res_qnan
9159fdiv_res_snan:
9160 bra.l res_snan
9161fdiv_res_operr:
9162 bra.l res_operr
9163
9164 global fdiv_zero_load # global for fsgldiv
9165fdiv_zero_load:
9166 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9167 mov.b DST_EX(%a1),%d1 # or of input signs.
9168 eor.b %d0,%d1
9169 bpl.b fdiv_zero_load_p # result is positive
9170 fmov.s &0x80000000,%fp0 # load a -ZERO
9171 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9172 rts
9173fdiv_zero_load_p:
9174 fmov.s &0x00000000,%fp0 # load a +ZERO
9175 mov.b &z_bmask,FPSR_CC(%a6) # set Z
9176 rts
9177
9178#
9179# The destination was In Range and the source was a ZERO. The result,
9180# Therefore, is an INF w/ the proper sign.
9181# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182#
9183 global fdiv_inf_load # global for fsgldiv
9184fdiv_inf_load:
9185 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186 mov.b SRC_EX(%a0),%d0 # load both signs
9187 mov.b DST_EX(%a1),%d1
9188 eor.b %d0,%d1
9189 bpl.b fdiv_inf_load_p # result is positive
9190 fmov.s &0xff800000,%fp0 # make result -INF
9191 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192 rts
9193fdiv_inf_load_p:
9194 fmov.s &0x7f800000,%fp0 # make result +INF
9195 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9196 rts
9197
9198#
9199# The destination was an INF w/ an In Range or ZERO source, the result is
9200# an INF w/ the proper sign.
9201# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202# dst INF is set, then then j-bit of the result INF is also set).
9203#
9204 global fdiv_inf_dst # global for fsgldiv
9205fdiv_inf_dst:
9206 mov.b DST_EX(%a1),%d0 # load both signs
9207 mov.b SRC_EX(%a0),%d1
9208 eor.b %d0,%d1
9209 bpl.b fdiv_inf_dst_p # result is positive
9210
9211 fmovm.x DST(%a1),&0x80 # return result in fp0
9212 fabs.x %fp0 # clear sign bit
9213 fneg.x %fp0 # set sign bit
9214 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215 rts
9216
9217fdiv_inf_dst_p:
9218 fmovm.x DST(%a1),&0x80 # return result in fp0
9219 fabs.x %fp0 # return positive INF
9220 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9221 rts
9222
9223#########################################################################
9224# XDEF **************************************************************** #
9225# fneg(): emulates the fneg instruction #
9226# fsneg(): emulates the fsneg instruction #
9227# fdneg(): emulates the fdneg instruction #
9228# #
9229# XREF **************************************************************** #
9230# norm() - normalize a denorm to provide EXOP #
9231# scale_to_zero_src() - scale sgl/dbl source exponent #
9232# ovf_res() - return default overflow result #
9233# unf_res() - return default underflow result #
9234# res_qnan_1op() - return QNAN result #
9235# res_snan_1op() - return SNAN result #
9236# #
9237# INPUT *************************************************************** #
9238# a0 = pointer to extended precision source operand #
9239# d0 = rnd prec,mode #
9240# #
9241# OUTPUT ************************************************************** #
9242# fp0 = result #
9243# fp1 = EXOP (if exception occurred) #
9244# #
9245# ALGORITHM *********************************************************** #
9246# Handle NANs, zeroes, and infinities as special cases. Separate #
9247# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9248# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9249# and an actual fneg performed to see if overflow/underflow would have #
9250# occurred. If so, return default underflow/overflow result. Else, #
9251# scale the result exponent and return result. FPSR gets set based on #
9252# the result value. #
9253# #
9254#########################################################################
9255
9256 global fsneg
9257fsneg:
9258 andi.b &0x30,%d0 # clear rnd prec
9259 ori.b &s_mode*0x10,%d0 # insert sgl precision
9260 bra.b fneg
9261
9262 global fdneg
9263fdneg:
9264 andi.b &0x30,%d0 # clear rnd prec
9265 ori.b &d_mode*0x10,%d0 # insert dbl prec
9266
9267 global fneg
9268fneg:
9269 mov.l %d0,L_SCR3(%a6) # store rnd info
9270 mov.b STAG(%a6),%d1
9271 bne.w fneg_not_norm # optimize on non-norm input
9272
9273#
9274# NEGATE SIGN : norms and denorms ONLY!
9275#
9276fneg_norm:
9277 andi.b &0xc0,%d0 # is precision extended?
9278 bne.w fneg_not_ext # no; go handle sgl or dbl
9279
9280#
9281# precision selected is extended. so...we can not get an underflow
9282# or overflow because of rounding to the correct precision. so...
9283# skip the scaling and unscaling...
9284#
9285 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9286 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9287 mov.w SRC_EX(%a0),%d0
9288 eori.w &0x8000,%d0 # negate sign
9289 bpl.b fneg_norm_load # sign is positive
9290 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9291fneg_norm_load:
9292 mov.w %d0,FP_SCR0_EX(%a6)
9293 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9294 rts
9295
9296#
9297# for an extended precision DENORM, the UNFL exception bit is set
9298# the accrued bit is NOT set in this instance(no inexactness!)
9299#
9300fneg_denorm:
9301 andi.b &0xc0,%d0 # is precision extended?
9302 bne.b fneg_not_ext # no; go handle sgl or dbl
9303
9304 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305
9306 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9307 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9308 mov.w SRC_EX(%a0),%d0
9309 eori.w &0x8000,%d0 # negate sign
9310 bpl.b fneg_denorm_done # no
9311 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9312fneg_denorm_done:
9313 mov.w %d0,FP_SCR0_EX(%a6)
9314 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9315
9316 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317 bne.b fneg_ext_unfl_ena # yes
9318 rts
9319
9320#
9321# the input is an extended DENORM and underflow is enabled in the FPCR.
9322# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323# exponent and insert back into the operand.
9324#
9325fneg_ext_unfl_ena:
9326 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9327 bsr.l norm # normalize result
9328 neg.w %d0 # new exponent = -(shft val)
9329 addi.w &0x6000,%d0 # add new bias to exponent
9330 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9331 andi.w &0x8000,%d1 # keep old sign
9332 andi.w &0x7fff,%d0 # clear sign position
9333 or.w %d1,%d0 # concat old sign, new exponent
9334 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9335 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9336 rts
9337
9338#
9339# operand is either single or double
9340#
9341fneg_not_ext:
9342 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9343 bne.b fneg_dbl
9344
9345#
9346# operand is to be rounded to single precision
9347#
9348fneg_sgl:
9349 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9350 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9351 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9352 bsr.l scale_to_zero_src # calculate scale factor
9353
9354 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9355 bge.w fneg_sd_unfl # yes; go handle underflow
9356 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9357 beq.w fneg_sd_may_ovfl # maybe; go check
9358 blt.w fneg_sd_ovfl # yes; go handle overflow
9359
9360#
9361# operand will NOT overflow or underflow when moved in to the fp reg file
9362#
9363fneg_sd_normal:
9364 fmov.l &0x0,%fpsr # clear FPSR
9365 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9366
9367 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9368
9369 fmov.l %fpsr,%d1 # save FPSR
9370 fmov.l &0x0,%fpcr # clear FPCR
9371
9372 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9373
9374fneg_sd_normal_exit:
9375 mov.l %d2,-(%sp) # save d2
9376 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9377 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9378 mov.w %d1,%d2 # make a copy
9379 andi.l &0x7fff,%d1 # strip sign
9380 sub.l %d0,%d1 # add scale factor
9381 andi.w &0x8000,%d2 # keep old sign
9382 or.w %d1,%d2 # concat old sign,new exp
9383 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9384 mov.l (%sp)+,%d2 # restore d2
9385 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9386 rts
9387
9388#
9389# operand is to be rounded to double precision
9390#
9391fneg_dbl:
9392 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9393 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9394 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9395 bsr.l scale_to_zero_src # calculate scale factor
9396
9397 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9398 bge.b fneg_sd_unfl # yes; go handle underflow
9399 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9400 beq.w fneg_sd_may_ovfl # maybe; go check
9401 blt.w fneg_sd_ovfl # yes; go handle overflow
9402 bra.w fneg_sd_normal # no; ho handle normalized op
9403
9404#
9405# operand WILL underflow when moved in to the fp register file
9406#
9407fneg_sd_unfl:
9408 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409
9410 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9411 bpl.b fneg_sd_unfl_tst
9412 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9413
9414# if underflow or inexact is enabled, go calculate EXOP first.
9415fneg_sd_unfl_tst:
9416 mov.b FPCR_ENABLE(%a6),%d1
9417 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9418 bne.b fneg_sd_unfl_ena # yes
9419
9420fneg_sd_unfl_dis:
9421 lea FP_SCR0(%a6),%a0 # pass: result addr
9422 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9423 bsr.l unf_res # calculate default result
9424 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9425 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9426 rts
9427
9428#
9429# operand will underflow AND underflow is enabled.
9430# Therefore, we must return the result rounded to extended precision.
9431#
9432fneg_sd_unfl_ena:
9433 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9436
9437 mov.l %d2,-(%sp) # save d2
9438 mov.l %d1,%d2 # make a copy
9439 andi.l &0x7fff,%d1 # strip sign
9440 andi.w &0x8000,%d2 # keep old sign
9441 sub.l %d0,%d1 # subtract scale factor
9442 addi.l &0x6000,%d1 # add new bias
9443 andi.w &0x7fff,%d1
9444 or.w %d2,%d1 # concat new sign,new exp
9445 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9446 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9447 mov.l (%sp)+,%d2 # restore d2
9448 bra.b fneg_sd_unfl_dis
9449
9450#
9451# operand WILL overflow.
9452#
9453fneg_sd_ovfl:
9454 fmov.l &0x0,%fpsr # clear FPSR
9455 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9456
9457 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9458
9459 fmov.l &0x0,%fpcr # clear FPCR
9460 fmov.l %fpsr,%d1 # save FPSR
9461
9462 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9463
9464fneg_sd_ovfl_tst:
9465 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466
9467 mov.b FPCR_ENABLE(%a6),%d1
9468 andi.b &0x13,%d1 # is OVFL or INEX enabled?
9469 bne.b fneg_sd_ovfl_ena # yes
9470
9471#
9472# OVFL is not enabled; therefore, we must create the default result by
9473# calling ovf_res().
9474#
9475fneg_sd_ovfl_dis:
9476 btst &neg_bit,FPSR_CC(%a6) # is result negative?
9477 sne %d1 # set sign param accordingly
9478 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9479 bsr.l ovf_res # calculate default result
9480 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9481 fmovm.x (%a0),&0x80 # return default result in fp0
9482 rts
9483
9484#
9485# OVFL is enabled.
9486# the INEX2 bit has already been updated by the round to the correct precision.
9487# now, round to extended(and don't alter the FPSR).
9488#
9489fneg_sd_ovfl_ena:
9490 mov.l %d2,-(%sp) # save d2
9491 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9492 mov.l %d1,%d2 # make a copy
9493 andi.l &0x7fff,%d1 # strip sign
9494 andi.w &0x8000,%d2 # keep old sign
9495 sub.l %d0,%d1 # add scale factor
9496 subi.l &0x6000,%d1 # subtract bias
9497 andi.w &0x7fff,%d1
9498 or.w %d2,%d1 # concat sign,exp
9499 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9500 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9501 mov.l (%sp)+,%d2 # restore d2
9502 bra.b fneg_sd_ovfl_dis
9503
9504#
9505# the move in MAY underflow. so...
9506#
9507fneg_sd_may_ovfl:
9508 fmov.l &0x0,%fpsr # clear FPSR
9509 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9510
9511 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9512
9513 fmov.l %fpsr,%d1 # save status
9514 fmov.l &0x0,%fpcr # clear FPCR
9515
9516 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9517
9518 fabs.x %fp0,%fp1 # make a copy of result
9519 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9520 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9521
9522# no, it didn't overflow; we have correct result
9523 bra.w fneg_sd_normal_exit
9524
9525##########################################################################
9526
9527#
9528# input is not normalized; what is it?
9529#
9530fneg_not_norm:
9531 cmpi.b %d1,&DENORM # weed out DENORM
9532 beq.w fneg_denorm
9533 cmpi.b %d1,&SNAN # weed out SNAN
9534 beq.l res_snan_1op
9535 cmpi.b %d1,&QNAN # weed out QNAN
9536 beq.l res_qnan_1op
9537
9538#
9539# do the fneg; at this point, only possible ops are ZERO and INF.
9540# use fneg to determine ccodes.
9541# prec:mode should be zero at this point but it won't affect answer anyways.
9542#
9543 fneg.x SRC_EX(%a0),%fp0 # do fneg
9544 fmov.l %fpsr,%d0
9545 rol.l &0x8,%d0 # put ccodes in lo byte
9546 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9547 rts
9548
9549#########################################################################
9550# XDEF **************************************************************** #
9551# ftst(): emulates the ftest instruction #
9552# #
9553# XREF **************************************************************** #
9554# res{s,q}nan_1op() - set NAN result for monadic instruction #
9555# #
9556# INPUT *************************************************************** #
9557# a0 = pointer to extended precision source operand #
9558# #
9559# OUTPUT ************************************************************** #
9560# none #
9561# #
9562# ALGORITHM *********************************************************** #
9563# Check the source operand tag (STAG) and set the FPCR according #
9564# to the operand type and sign. #
9565# #
9566#########################################################################
9567
9568 global ftst
9569ftst:
9570 mov.b STAG(%a6),%d1
9571 bne.b ftst_not_norm # optimize on non-norm input
9572
9573#
9574# Norm:
9575#
9576ftst_norm:
9577 tst.b SRC_EX(%a0) # is operand negative?
9578 bmi.b ftst_norm_m # yes
9579 rts
9580ftst_norm_m:
9581 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9582 rts
9583
9584#
9585# input is not normalized; what is it?
9586#
9587ftst_not_norm:
9588 cmpi.b %d1,&ZERO # weed out ZERO
9589 beq.b ftst_zero
9590 cmpi.b %d1,&INF # weed out INF
9591 beq.b ftst_inf
9592 cmpi.b %d1,&SNAN # weed out SNAN
9593 beq.l res_snan_1op
9594 cmpi.b %d1,&QNAN # weed out QNAN
9595 beq.l res_qnan_1op
9596
9597#
9598# Denorm:
9599#
9600ftst_denorm:
9601 tst.b SRC_EX(%a0) # is operand negative?
9602 bmi.b ftst_denorm_m # yes
9603 rts
9604ftst_denorm_m:
9605 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9606 rts
9607
9608#
9609# Infinity:
9610#
9611ftst_inf:
9612 tst.b SRC_EX(%a0) # is operand negative?
9613 bmi.b ftst_inf_m # yes
9614ftst_inf_p:
9615 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9616 rts
9617ftst_inf_m:
9618 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619 rts
9620
9621#
9622# Zero:
9623#
9624ftst_zero:
9625 tst.b SRC_EX(%a0) # is operand negative?
9626 bmi.b ftst_zero_m # yes
9627ftst_zero_p:
9628 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9629 rts
9630ftst_zero_m:
9631 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9632 rts
9633
9634#########################################################################
9635# XDEF **************************************************************** #
9636# fint(): emulates the fint instruction #
9637# #
9638# XREF **************************************************************** #
9639# res_{s,q}nan_1op() - set NAN result for monadic operation #
9640# #
9641# INPUT *************************************************************** #
9642# a0 = pointer to extended precision source operand #
9643# d0 = round precision/mode #
9644# #
9645# OUTPUT ************************************************************** #
9646# fp0 = result #
9647# #
9648# ALGORITHM *********************************************************** #
9649# Separate according to operand type. Unnorms don't pass through #
9650# here. For norms, load the rounding mode/prec, execute a "fint", then #
9651# store the resulting FPSR bits. #
9652# For denorms, force the j-bit to a one and do the same as for #
9653# norms. Denorms are so low that the answer will either be a zero or a #
9654# one. #
9655# For zeroes/infs/NANs, return the same while setting the FPSR #
9656# as appropriate. #
9657# #
9658#########################################################################
9659
9660 global fint
9661fint:
9662 mov.b STAG(%a6),%d1
9663 bne.b fint_not_norm # optimize on non-norm input
9664
9665#
9666# Norm:
9667#
9668fint_norm:
9669 andi.b &0x30,%d0 # set prec = ext
9670
9671 fmov.l %d0,%fpcr # set FPCR
9672 fmov.l &0x0,%fpsr # clear FPSR
9673
9674 fint.x SRC(%a0),%fp0 # execute fint
9675
9676 fmov.l &0x0,%fpcr # clear FPCR
9677 fmov.l %fpsr,%d0 # save FPSR
9678 or.l %d0,USER_FPSR(%a6) # set exception bits
9679
9680 rts
9681
9682#
9683# input is not normalized; what is it?
9684#
9685fint_not_norm:
9686 cmpi.b %d1,&ZERO # weed out ZERO
9687 beq.b fint_zero
9688 cmpi.b %d1,&INF # weed out INF
9689 beq.b fint_inf
9690 cmpi.b %d1,&DENORM # weed out DENORM
9691 beq.b fint_denorm
9692 cmpi.b %d1,&SNAN # weed out SNAN
9693 beq.l res_snan_1op
9694 bra.l res_qnan_1op # weed out QNAN
9695
9696#
9697# Denorm:
9698#
9699# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700# also, the INEX2 and AINEX exception bits will be set.
9701# so, we could either set these manually or force the DENORM
9702# to a very small NORM and ship it to the NORM routine.
9703# I do the latter.
9704#
9705fint_denorm:
9706 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9708 lea FP_SCR0(%a6),%a0
9709 bra.b fint_norm
9710
9711#
9712# Zero:
9713#
9714fint_zero:
9715 tst.b SRC_EX(%a0) # is ZERO negative?
9716 bmi.b fint_zero_m # yes
9717fint_zero_p:
9718 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9719 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9720 rts
9721fint_zero_m:
9722 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9723 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724 rts
9725
9726#
9727# Infinity:
9728#
9729fint_inf:
9730 fmovm.x SRC(%a0),&0x80 # return result in fp0
9731 tst.b SRC_EX(%a0) # is INF negative?
9732 bmi.b fint_inf_m # yes
9733fint_inf_p:
9734 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9735 rts
9736fint_inf_m:
9737 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738 rts
9739
9740#########################################################################
9741# XDEF **************************************************************** #
9742# fintrz(): emulates the fintrz instruction #
9743# #
9744# XREF **************************************************************** #
9745# res_{s,q}nan_1op() - set NAN result for monadic operation #
9746# #
9747# INPUT *************************************************************** #
9748# a0 = pointer to extended precision source operand #
9749# d0 = round precision/mode #
9750# #
9751# OUTPUT ************************************************************** #
9752# fp0 = result #
9753# #
9754# ALGORITHM *********************************************************** #
9755# Separate according to operand type. Unnorms don't pass through #
9756# here. For norms, load the rounding mode/prec, execute a "fintrz", #
9757# then store the resulting FPSR bits. #
9758# For denorms, force the j-bit to a one and do the same as for #
9759# norms. Denorms are so low that the answer will either be a zero or a #
9760# one. #
9761# For zeroes/infs/NANs, return the same while setting the FPSR #
9762# as appropriate. #
9763# #
9764#########################################################################
9765
9766 global fintrz
9767fintrz:
9768 mov.b STAG(%a6),%d1
9769 bne.b fintrz_not_norm # optimize on non-norm input
9770
9771#
9772# Norm:
9773#
9774fintrz_norm:
9775 fmov.l &0x0,%fpsr # clear FPSR
9776
9777 fintrz.x SRC(%a0),%fp0 # execute fintrz
9778
9779 fmov.l %fpsr,%d0 # save FPSR
9780 or.l %d0,USER_FPSR(%a6) # set exception bits
9781
9782 rts
9783
9784#
9785# input is not normalized; what is it?
9786#
9787fintrz_not_norm:
9788 cmpi.b %d1,&ZERO # weed out ZERO
9789 beq.b fintrz_zero
9790 cmpi.b %d1,&INF # weed out INF
9791 beq.b fintrz_inf
9792 cmpi.b %d1,&DENORM # weed out DENORM
9793 beq.b fintrz_denorm
9794 cmpi.b %d1,&SNAN # weed out SNAN
9795 beq.l res_snan_1op
9796 bra.l res_qnan_1op # weed out QNAN
9797
9798#
9799# Denorm:
9800#
9801# for DENORMs, the result will be (+/-)ZERO.
9802# also, the INEX2 and AINEX exception bits will be set.
9803# so, we could either set these manually or force the DENORM
9804# to a very small NORM and ship it to the NORM routine.
9805# I do the latter.
9806#
9807fintrz_denorm:
9808 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9810 lea FP_SCR0(%a6),%a0
9811 bra.b fintrz_norm
9812
9813#
9814# Zero:
9815#
9816fintrz_zero:
9817 tst.b SRC_EX(%a0) # is ZERO negative?
9818 bmi.b fintrz_zero_m # yes
9819fintrz_zero_p:
9820 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9821 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9822 rts
9823fintrz_zero_m:
9824 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9825 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826 rts
9827
9828#
9829# Infinity:
9830#
9831fintrz_inf:
9832 fmovm.x SRC(%a0),&0x80 # return result in fp0
9833 tst.b SRC_EX(%a0) # is INF negative?
9834 bmi.b fintrz_inf_m # yes
9835fintrz_inf_p:
9836 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9837 rts
9838fintrz_inf_m:
9839 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840 rts
9841
9842#########################################################################
9843# XDEF **************************************************************** #
9844# fabs(): emulates the fabs instruction #
9845# fsabs(): emulates the fsabs instruction #
9846# fdabs(): emulates the fdabs instruction #
9847# #
9848# XREF **************************************************************** #
9849# norm() - normalize denorm mantissa to provide EXOP #
9850# scale_to_zero_src() - make exponent. = 0; get scale factor #
9851# unf_res() - calculate underflow result #
9852# ovf_res() - calculate overflow result #
9853# res_{s,q}nan_1op() - set NAN result for monadic operation #
9854# #
9855# INPUT *************************************************************** #
9856# a0 = pointer to extended precision source operand #
9857# d0 = rnd precision/mode #
9858# #
9859# OUTPUT ************************************************************** #
9860# fp0 = result #
9861# fp1 = EXOP (if exception occurred) #
9862# #
9863# ALGORITHM *********************************************************** #
9864# Handle NANs, infinities, and zeroes as special cases. Divide #
9865# norms into extended, single, and double precision. #
9866# Simply clear sign for extended precision norm. Ext prec denorm #
9867# gets an EXOP created for it since it's an underflow. #
9868# Double and single precision can overflow and underflow. First, #
9869# scale the operand such that the exponent is zero. Perform an "fabs" #
9870# using the correct rnd mode/prec. Check to see if the original #
9871# exponent would take an exception. If so, use unf_res() or ovf_res() #
9872# to calculate the default result. Also, create the EXOP for the #
9873# exceptional case. If no exception should occur, insert the correct #
9874# result exponent and return. #
9875# Unnorms don't pass through here. #
9876# #
9877#########################################################################
9878
9879 global fsabs
9880fsabs:
9881 andi.b &0x30,%d0 # clear rnd prec
9882 ori.b &s_mode*0x10,%d0 # insert sgl precision
9883 bra.b fabs
9884
9885 global fdabs
9886fdabs:
9887 andi.b &0x30,%d0 # clear rnd prec
9888 ori.b &d_mode*0x10,%d0 # insert dbl precision
9889
9890 global fabs
9891fabs:
9892 mov.l %d0,L_SCR3(%a6) # store rnd info
9893 mov.b STAG(%a6),%d1
9894 bne.w fabs_not_norm # optimize on non-norm input
9895
9896#
9897# ABSOLUTE VALUE: norms and denorms ONLY!
9898#
9899fabs_norm:
9900 andi.b &0xc0,%d0 # is precision extended?
9901 bne.b fabs_not_ext # no; go handle sgl or dbl
9902
9903#
9904# precision selected is extended. so...we can not get an underflow
9905# or overflow because of rounding to the correct precision. so...
9906# skip the scaling and unscaling...
9907#
9908 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9909 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9910 mov.w SRC_EX(%a0),%d1
9911 bclr &15,%d1 # force absolute value
9912 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9913 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9914 rts
9915
9916#
9917# for an extended precision DENORM, the UNFL exception bit is set
9918# the accrued bit is NOT set in this instance(no inexactness!)
9919#
9920fabs_denorm:
9921 andi.b &0xc0,%d0 # is precision extended?
9922 bne.b fabs_not_ext # no
9923
9924 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925
9926 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9927 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9928 mov.w SRC_EX(%a0),%d0
9929 bclr &15,%d0 # clear sign
9930 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9931
9932 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9933
9934 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935 bne.b fabs_ext_unfl_ena
9936 rts
9937
9938#
9939# the input is an extended DENORM and underflow is enabled in the FPCR.
9940# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941# exponent and insert back into the operand.
9942#
9943fabs_ext_unfl_ena:
9944 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9945 bsr.l norm # normalize result
9946 neg.w %d0 # new exponent = -(shft val)
9947 addi.w &0x6000,%d0 # add new bias to exponent
9948 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9949 andi.w &0x8000,%d1 # keep old sign
9950 andi.w &0x7fff,%d0 # clear sign position
9951 or.w %d1,%d0 # concat old sign, new exponent
9952 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9953 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9954 rts
9955
9956#
9957# operand is either single or double
9958#
9959fabs_not_ext:
9960 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9961 bne.b fabs_dbl
9962
9963#
9964# operand is to be rounded to single precision
9965#
9966fabs_sgl:
9967 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9968 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9969 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9970 bsr.l scale_to_zero_src # calculate scale factor
9971
9972 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9973 bge.w fabs_sd_unfl # yes; go handle underflow
9974 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9975 beq.w fabs_sd_may_ovfl # maybe; go check
9976 blt.w fabs_sd_ovfl # yes; go handle overflow
9977
9978#
9979# operand will NOT overflow or underflow when moved in to the fp reg file
9980#
9981fabs_sd_normal:
9982 fmov.l &0x0,%fpsr # clear FPSR
9983 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9984
9985 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9986
9987 fmov.l %fpsr,%d1 # save FPSR
9988 fmov.l &0x0,%fpcr # clear FPCR
9989
9990 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9991
9992fabs_sd_normal_exit:
9993 mov.l %d2,-(%sp) # save d2
9994 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9995 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9996 mov.l %d1,%d2 # make a copy
9997 andi.l &0x7fff,%d1 # strip sign
9998 sub.l %d0,%d1 # add scale factor
9999 andi.w &0x8000,%d2 # keep old sign
10000 or.w %d1,%d2 # concat old sign,new exp
10001 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10002 mov.l (%sp)+,%d2 # restore d2
10003 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10004 rts
10005
10006#
10007# operand is to be rounded to double precision
10008#
10009fabs_dbl:
10010 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10011 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10012 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10013 bsr.l scale_to_zero_src # calculate scale factor
10014
10015 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10016 bge.b fabs_sd_unfl # yes; go handle underflow
10017 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10018 beq.w fabs_sd_may_ovfl # maybe; go check
10019 blt.w fabs_sd_ovfl # yes; go handle overflow
10020 bra.w fabs_sd_normal # no; ho handle normalized op
10021
10022#
10023# operand WILL underflow when moved in to the fp register file
10024#
10025fabs_sd_unfl:
10026 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027
10028 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10029
10030# if underflow or inexact is enabled, go calculate EXOP first.
10031 mov.b FPCR_ENABLE(%a6),%d1
10032 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10033 bne.b fabs_sd_unfl_ena # yes
10034
10035fabs_sd_unfl_dis:
10036 lea FP_SCR0(%a6),%a0 # pass: result addr
10037 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10038 bsr.l unf_res # calculate default result
10039 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10040 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10041 rts
10042
10043#
10044# operand will underflow AND underflow is enabled.
10045# Therefore, we must return the result rounded to extended precision.
10046#
10047fabs_sd_unfl_ena:
10048 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10051
10052 mov.l %d2,-(%sp) # save d2
10053 mov.l %d1,%d2 # make a copy
10054 andi.l &0x7fff,%d1 # strip sign
10055 andi.w &0x8000,%d2 # keep old sign
10056 sub.l %d0,%d1 # subtract scale factor
10057 addi.l &0x6000,%d1 # add new bias
10058 andi.w &0x7fff,%d1
10059 or.w %d2,%d1 # concat new sign,new exp
10060 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10061 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10062 mov.l (%sp)+,%d2 # restore d2
10063 bra.b fabs_sd_unfl_dis
10064
10065#
10066# operand WILL overflow.
10067#
10068fabs_sd_ovfl:
10069 fmov.l &0x0,%fpsr # clear FPSR
10070 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10071
10072 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10073
10074 fmov.l &0x0,%fpcr # clear FPCR
10075 fmov.l %fpsr,%d1 # save FPSR
10076
10077 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10078
10079fabs_sd_ovfl_tst:
10080 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081
10082 mov.b FPCR_ENABLE(%a6),%d1
10083 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10084 bne.b fabs_sd_ovfl_ena # yes
10085
10086#
10087# OVFL is not enabled; therefore, we must create the default result by
10088# calling ovf_res().
10089#
10090fabs_sd_ovfl_dis:
10091 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10092 sne %d1 # set sign param accordingly
10093 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10094 bsr.l ovf_res # calculate default result
10095 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10096 fmovm.x (%a0),&0x80 # return default result in fp0
10097 rts
10098
10099#
10100# OVFL is enabled.
10101# the INEX2 bit has already been updated by the round to the correct precision.
10102# now, round to extended(and don't alter the FPSR).
10103#
10104fabs_sd_ovfl_ena:
10105 mov.l %d2,-(%sp) # save d2
10106 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10107 mov.l %d1,%d2 # make a copy
10108 andi.l &0x7fff,%d1 # strip sign
10109 andi.w &0x8000,%d2 # keep old sign
10110 sub.l %d0,%d1 # add scale factor
10111 subi.l &0x6000,%d1 # subtract bias
10112 andi.w &0x7fff,%d1
10113 or.w %d2,%d1 # concat sign,exp
10114 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10115 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10116 mov.l (%sp)+,%d2 # restore d2
10117 bra.b fabs_sd_ovfl_dis
10118
10119#
10120# the move in MAY underflow. so...
10121#
10122fabs_sd_may_ovfl:
10123 fmov.l &0x0,%fpsr # clear FPSR
10124 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10125
10126 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10127
10128 fmov.l %fpsr,%d1 # save status
10129 fmov.l &0x0,%fpcr # clear FPCR
10130
10131 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10132
10133 fabs.x %fp0,%fp1 # make a copy of result
10134 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10135 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10136
10137# no, it didn't overflow; we have correct result
10138 bra.w fabs_sd_normal_exit
10139
10140##########################################################################
10141
10142#
10143# input is not normalized; what is it?
10144#
10145fabs_not_norm:
10146 cmpi.b %d1,&DENORM # weed out DENORM
10147 beq.w fabs_denorm
10148 cmpi.b %d1,&SNAN # weed out SNAN
10149 beq.l res_snan_1op
10150 cmpi.b %d1,&QNAN # weed out QNAN
10151 beq.l res_qnan_1op
10152
10153 fabs.x SRC(%a0),%fp0 # force absolute value
10154
10155 cmpi.b %d1,&INF # weed out INF
10156 beq.b fabs_inf
10157fabs_zero:
10158 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10159 rts
10160fabs_inf:
10161 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10162 rts
10163
10164#########################################################################
10165# XDEF **************************************************************** #
10166# fcmp(): fp compare op routine #
10167# #
10168# XREF **************************************************************** #
10169# res_qnan() - return QNAN result #
10170# res_snan() - return SNAN result #
10171# #
10172# INPUT *************************************************************** #
10173# a0 = pointer to extended precision source operand #
10174# a1 = pointer to extended precision destination operand #
10175# d0 = round prec/mode #
10176# #
10177# OUTPUT ************************************************************** #
10178# None #
10179# #
10180# ALGORITHM *********************************************************** #
10181# Handle NANs and denorms as special cases. For everything else, #
10182# just use the actual fcmp instruction to produce the correct condition #
10183# codes. #
10184# #
10185#########################################################################
10186
10187 global fcmp
10188fcmp:
10189 clr.w %d1
10190 mov.b DTAG(%a6),%d1
10191 lsl.b &0x3,%d1
10192 or.b STAG(%a6),%d1
10193 bne.b fcmp_not_norm # optimize on non-norm input
10194
10195#
10196# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197#
10198fcmp_norm:
10199 fmovm.x DST(%a1),&0x80 # load dst op
10200
10201 fcmp.x %fp0,SRC(%a0) # do compare
10202
10203 fmov.l %fpsr,%d0 # save FPSR
10204 rol.l &0x8,%d0 # extract ccode bits
10205 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10206
10207 rts
10208
10209#
10210# fcmp: inputs are not both normalized; what are they?
10211#
10212fcmp_not_norm:
10213 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10215
10216 swbeg &48
10217tbl_fcmp_op:
10218 short fcmp_norm - tbl_fcmp_op # NORM - NORM
10219 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10220 short fcmp_norm - tbl_fcmp_op # NORM - INF
10221 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10222 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10223 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10224 short tbl_fcmp_op - tbl_fcmp_op #
10225 short tbl_fcmp_op - tbl_fcmp_op #
10226
10227 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10228 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10229 short fcmp_norm - tbl_fcmp_op # ZERO - INF
10230 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10231 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10232 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10233 short tbl_fcmp_op - tbl_fcmp_op #
10234 short tbl_fcmp_op - tbl_fcmp_op #
10235
10236 short fcmp_norm - tbl_fcmp_op # INF - NORM
10237 short fcmp_norm - tbl_fcmp_op # INF - ZERO
10238 short fcmp_norm - tbl_fcmp_op # INF - INF
10239 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10240 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10241 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10242 short tbl_fcmp_op - tbl_fcmp_op #
10243 short tbl_fcmp_op - tbl_fcmp_op #
10244
10245 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10246 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10247 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10248 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10249 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10250 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10251 short tbl_fcmp_op - tbl_fcmp_op #
10252 short tbl_fcmp_op - tbl_fcmp_op #
10253
10254 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10255 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10256 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10257 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10258 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10259 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10260 short tbl_fcmp_op - tbl_fcmp_op #
10261 short tbl_fcmp_op - tbl_fcmp_op #
10262
10263 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10264 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10265 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10266 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10267 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10268 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10269 short tbl_fcmp_op - tbl_fcmp_op #
10270 short tbl_fcmp_op - tbl_fcmp_op #
10271
10272# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274fcmp_res_qnan:
10275 bsr.l res_qnan
10276 andi.b &0xf7,FPSR_CC(%a6)
10277 rts
10278fcmp_res_snan:
10279 bsr.l res_snan
10280 andi.b &0xf7,FPSR_CC(%a6)
10281 rts
10282
10283#
10284# DENORMs are a little more difficult.
10285# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286# and use the fcmp_norm routine.
10287# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288# and use the fcmp_norm routine.
10289# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291# (1) signs are (+) and the DENORM is the dst or
10292# (2) signs are (-) and the DENORM is the src
10293#
10294
10295fcmp_dnrm_s:
10296 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10297 mov.l SRC_HI(%a0),%d0
10298 bset &31,%d0 # DENORM src; make into small norm
10299 mov.l %d0,FP_SCR0_HI(%a6)
10300 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10301 lea FP_SCR0(%a6),%a0
10302 bra.w fcmp_norm
10303
10304fcmp_dnrm_d:
10305 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10306 mov.l DST_HI(%a1),%d0
10307 bset &31,%d0 # DENORM src; make into small norm
10308 mov.l %d0,FP_SCR0_HI(%a6)
10309 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10310 lea FP_SCR0(%a6),%a1
10311 bra.w fcmp_norm
10312
10313fcmp_dnrm_sd:
10314 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10315 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10316 mov.l DST_HI(%a1),%d0
10317 bset &31,%d0 # DENORM dst; make into small norm
10318 mov.l %d0,FP_SCR1_HI(%a6)
10319 mov.l SRC_HI(%a0),%d0
10320 bset &31,%d0 # DENORM dst; make into small norm
10321 mov.l %d0,FP_SCR0_HI(%a6)
10322 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10323 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10324 lea FP_SCR1(%a6),%a1
10325 lea FP_SCR0(%a6),%a0
10326 bra.w fcmp_norm
10327
10328fcmp_nrm_dnrm:
10329 mov.b SRC_EX(%a0),%d0 # determine if like signs
10330 mov.b DST_EX(%a1),%d1
10331 eor.b %d0,%d1
10332 bmi.w fcmp_dnrm_s
10333
10334# signs are the same, so must determine the answer ourselves.
10335 tst.b %d0 # is src op negative?
10336 bmi.b fcmp_nrm_dnrm_m # yes
10337 rts
10338fcmp_nrm_dnrm_m:
10339 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10340 rts
10341
10342fcmp_dnrm_nrm:
10343 mov.b SRC_EX(%a0),%d0 # determine if like signs
10344 mov.b DST_EX(%a1),%d1
10345 eor.b %d0,%d1
10346 bmi.w fcmp_dnrm_d
10347
10348# signs are the same, so must determine the answer ourselves.
10349 tst.b %d0 # is src op negative?
10350 bpl.b fcmp_dnrm_nrm_m # no
10351 rts
10352fcmp_dnrm_nrm_m:
10353 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10354 rts
10355
10356#########################################################################
10357# XDEF **************************************************************** #
10358# fsglmul(): emulates the fsglmul instruction #
10359# #
10360# XREF **************************************************************** #
10361# scale_to_zero_src() - scale src exponent to zero #
10362# scale_to_zero_dst() - scale dst exponent to zero #
10363# unf_res4() - return default underflow result for sglop #
10364# ovf_res() - return default overflow result #
10365# res_qnan() - return QNAN result #
10366# res_snan() - return SNAN result #
10367# #
10368# INPUT *************************************************************** #
10369# a0 = pointer to extended precision source operand #
10370# a1 = pointer to extended precision destination operand #
10371# d0 rnd prec,mode #
10372# #
10373# OUTPUT ************************************************************** #
10374# fp0 = result #
10375# fp1 = EXOP (if exception occurred) #
10376# #
10377# ALGORITHM *********************************************************** #
10378# Handle NANs, infinities, and zeroes as special cases. Divide #
10379# norms/denorms into ext/sgl/dbl precision. #
10380# For norms/denorms, scale the exponents such that a multiply #
10381# instruction won't cause an exception. Use the regular fsglmul to #
10382# compute a result. Check if the regular operands would have taken #
10383# an exception. If so, return the default overflow/underflow result #
10384# and return the EXOP if exceptions are enabled. Else, scale the #
10385# result operand to the proper exponent. #
10386# #
10387#########################################################################
10388
10389 global fsglmul
10390fsglmul:
10391 mov.l %d0,L_SCR3(%a6) # store rnd info
10392
10393 clr.w %d1
10394 mov.b DTAG(%a6),%d1
10395 lsl.b &0x3,%d1
10396 or.b STAG(%a6),%d1
10397
10398 bne.w fsglmul_not_norm # optimize on non-norm input
10399
10400fsglmul_norm:
10401 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10402 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10403 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10404
10405 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10406 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10407 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10408
10409 bsr.l scale_to_zero_src # scale exponent
10410 mov.l %d0,-(%sp) # save scale factor 1
10411
10412 bsr.l scale_to_zero_dst # scale dst exponent
10413
10414 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10415
10416 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10417 beq.w fsglmul_may_ovfl # result may rnd to overflow
10418 blt.w fsglmul_ovfl # result will overflow
10419
10420 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10421 beq.w fsglmul_may_unfl # result may rnd to no unfl
10422 bgt.w fsglmul_unfl # result will underflow
10423
10424fsglmul_normal:
10425 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10426
10427 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10428 fmov.l &0x0,%fpsr # clear FPSR
10429
10430 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10431
10432 fmov.l %fpsr,%d1 # save status
10433 fmov.l &0x0,%fpcr # clear FPCR
10434
10435 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10436
10437fsglmul_normal_exit:
10438 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10439 mov.l %d2,-(%sp) # save d2
10440 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10441 mov.l %d1,%d2 # make a copy
10442 andi.l &0x7fff,%d1 # strip sign
10443 andi.w &0x8000,%d2 # keep old sign
10444 sub.l %d0,%d1 # add scale factor
10445 or.w %d2,%d1 # concat old sign,new exp
10446 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10447 mov.l (%sp)+,%d2 # restore d2
10448 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10449 rts
10450
10451fsglmul_ovfl:
10452 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10453
10454 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10455 fmov.l &0x0,%fpsr # clear FPSR
10456
10457 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10458
10459 fmov.l %fpsr,%d1 # save status
10460 fmov.l &0x0,%fpcr # clear FPCR
10461
10462 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10463
10464fsglmul_ovfl_tst:
10465
10466# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468
10469 mov.b FPCR_ENABLE(%a6),%d1
10470 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10471 bne.b fsglmul_ovfl_ena # yes
10472
10473fsglmul_ovfl_dis:
10474 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10475 sne %d1 # set sign param accordingly
10476 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10477 andi.b &0x30,%d0 # force prec = ext
10478 bsr.l ovf_res # calculate default result
10479 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10480 fmovm.x (%a0),&0x80 # return default result in fp0
10481 rts
10482
10483fsglmul_ovfl_ena:
10484 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10485
10486 mov.l %d2,-(%sp) # save d2
10487 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10488 mov.l %d1,%d2 # make a copy
10489 andi.l &0x7fff,%d1 # strip sign
10490 sub.l %d0,%d1 # add scale factor
10491 subi.l &0x6000,%d1 # subtract bias
10492 andi.w &0x7fff,%d1
10493 andi.w &0x8000,%d2 # keep old sign
10494 or.w %d2,%d1 # concat old sign,new exp
10495 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10496 mov.l (%sp)+,%d2 # restore d2
10497 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10498 bra.b fsglmul_ovfl_dis
10499
10500fsglmul_may_ovfl:
10501 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10502
10503 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10504 fmov.l &0x0,%fpsr # clear FPSR
10505
10506 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10507
10508 fmov.l %fpsr,%d1 # save status
10509 fmov.l &0x0,%fpcr # clear FPCR
10510
10511 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10512
10513 fabs.x %fp0,%fp1 # make a copy of result
10514 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10515 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10516
10517# no, it didn't overflow; we have correct result
10518 bra.w fsglmul_normal_exit
10519
10520fsglmul_unfl:
10521 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522
10523 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10524
10525 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10526 fmov.l &0x0,%fpsr # clear FPSR
10527
10528 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10529
10530 fmov.l %fpsr,%d1 # save status
10531 fmov.l &0x0,%fpcr # clear FPCR
10532
10533 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10534
10535 mov.b FPCR_ENABLE(%a6),%d1
10536 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10537 bne.b fsglmul_unfl_ena # yes
10538
10539fsglmul_unfl_dis:
10540 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10541
10542 lea FP_SCR0(%a6),%a0 # pass: result addr
10543 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10544 bsr.l unf_res4 # calculate default result
10545 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10546 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10547 rts
10548
10549#
10550# UNFL is enabled.
10551#
10552fsglmul_unfl_ena:
10553 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10554
10555 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10556 fmov.l &0x0,%fpsr # clear FPSR
10557
10558 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10559
10560 fmov.l &0x0,%fpcr # clear FPCR
10561
10562 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10563 mov.l %d2,-(%sp) # save d2
10564 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10565 mov.l %d1,%d2 # make a copy
10566 andi.l &0x7fff,%d1 # strip sign
10567 andi.w &0x8000,%d2 # keep old sign
10568 sub.l %d0,%d1 # add scale factor
10569 addi.l &0x6000,%d1 # add bias
10570 andi.w &0x7fff,%d1
10571 or.w %d2,%d1 # concat old sign,new exp
10572 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10573 mov.l (%sp)+,%d2 # restore d2
10574 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10575 bra.w fsglmul_unfl_dis
10576
10577fsglmul_may_unfl:
10578 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10579
10580 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10581 fmov.l &0x0,%fpsr # clear FPSR
10582
10583 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10584
10585 fmov.l %fpsr,%d1 # save status
10586 fmov.l &0x0,%fpcr # clear FPCR
10587
10588 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10589
10590 fabs.x %fp0,%fp1 # make a copy of result
10591 fcmp.b %fp1,&0x2 # is |result| > 2.b?
10592 fbgt.w fsglmul_normal_exit # no; no underflow occurred
10593 fblt.w fsglmul_unfl # yes; underflow occurred
10594
10595#
10596# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597# we don't know if the result was an underflow that rounded up to a 2 or
10598# a normalized number that rounded down to a 2. so, redo the entire operation
10599# using RZ as the rounding mode to see what the pre-rounded result is.
10600# this case should be relatively rare.
10601#
10602 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10603
10604 mov.l L_SCR3(%a6),%d1
10605 andi.b &0xc0,%d1 # keep rnd prec
10606 ori.b &rz_mode*0x10,%d1 # insert RZ
10607
10608 fmov.l %d1,%fpcr # set FPCR
10609 fmov.l &0x0,%fpsr # clear FPSR
10610
10611 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10612
10613 fmov.l &0x0,%fpcr # clear FPCR
10614 fabs.x %fp1 # make absolute value
10615 fcmp.b %fp1,&0x2 # is |result| < 2.b?
10616 fbge.w fsglmul_normal_exit # no; no underflow occurred
10617 bra.w fsglmul_unfl # yes, underflow occurred
10618
10619##############################################################################
10620
10621#
10622# Single Precision Multiply: inputs are not both normalized; what are they?
10623#
10624fsglmul_not_norm:
10625 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10627
10628 swbeg &48
10629tbl_fsglmul_op:
10630 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10631 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10632 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10633 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10634 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10635 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10636 short tbl_fsglmul_op - tbl_fsglmul_op #
10637 short tbl_fsglmul_op - tbl_fsglmul_op #
10638
10639 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10640 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10641 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10642 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10643 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10644 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10645 short tbl_fsglmul_op - tbl_fsglmul_op #
10646 short tbl_fsglmul_op - tbl_fsglmul_op #
10647
10648 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10649 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10650 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10651 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10652 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10653 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10654 short tbl_fsglmul_op - tbl_fsglmul_op #
10655 short tbl_fsglmul_op - tbl_fsglmul_op #
10656
10657 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10658 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10659 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10660 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10661 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10662 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10663 short tbl_fsglmul_op - tbl_fsglmul_op #
10664 short tbl_fsglmul_op - tbl_fsglmul_op #
10665
10666 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10667 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10668 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10669 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10670 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10671 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10672 short tbl_fsglmul_op - tbl_fsglmul_op #
10673 short tbl_fsglmul_op - tbl_fsglmul_op #
10674
10675 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10676 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10677 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10678 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10679 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10680 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10681 short tbl_fsglmul_op - tbl_fsglmul_op #
10682 short tbl_fsglmul_op - tbl_fsglmul_op #
10683
10684fsglmul_res_operr:
10685 bra.l res_operr
10686fsglmul_res_snan:
10687 bra.l res_snan
10688fsglmul_res_qnan:
10689 bra.l res_qnan
10690fsglmul_zero:
10691 bra.l fmul_zero
10692fsglmul_inf_src:
10693 bra.l fmul_inf_src
10694fsglmul_inf_dst:
10695 bra.l fmul_inf_dst
10696
10697#########################################################################
10698# XDEF **************************************************************** #
10699# fsgldiv(): emulates the fsgldiv instruction #
10700# #
10701# XREF **************************************************************** #
10702# scale_to_zero_src() - scale src exponent to zero #
10703# scale_to_zero_dst() - scale dst exponent to zero #
10704# unf_res4() - return default underflow result for sglop #
10705# ovf_res() - return default overflow result #
10706# res_qnan() - return QNAN result #
10707# res_snan() - return SNAN result #
10708# #
10709# INPUT *************************************************************** #
10710# a0 = pointer to extended precision source operand #
10711# a1 = pointer to extended precision destination operand #
10712# d0 rnd prec,mode #
10713# #
10714# OUTPUT ************************************************************** #
10715# fp0 = result #
10716# fp1 = EXOP (if exception occurred) #
10717# #
10718# ALGORITHM *********************************************************** #
10719# Handle NANs, infinities, and zeroes as special cases. Divide #
10720# norms/denorms into ext/sgl/dbl precision. #
10721# For norms/denorms, scale the exponents such that a divide #
10722# instruction won't cause an exception. Use the regular fsgldiv to #
10723# compute a result. Check if the regular operands would have taken #
10724# an exception. If so, return the default overflow/underflow result #
10725# and return the EXOP if exceptions are enabled. Else, scale the #
10726# result operand to the proper exponent. #
10727# #
10728#########################################################################
10729
10730 global fsgldiv
10731fsgldiv:
10732 mov.l %d0,L_SCR3(%a6) # store rnd info
10733
10734 clr.w %d1
10735 mov.b DTAG(%a6),%d1
10736 lsl.b &0x3,%d1
10737 or.b STAG(%a6),%d1 # combine src tags
10738
10739 bne.w fsgldiv_not_norm # optimize on non-norm input
10740
10741#
10742# DIVIDE: NORMs and DENORMs ONLY!
10743#
10744fsgldiv_norm:
10745 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10746 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10747 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10748
10749 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10750 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10751 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10752
10753 bsr.l scale_to_zero_src # calculate scale factor 1
10754 mov.l %d0,-(%sp) # save scale factor 1
10755
10756 bsr.l scale_to_zero_dst # calculate scale factor 2
10757
10758 neg.l (%sp) # S.F. = scale1 - scale2
10759 add.l %d0,(%sp)
10760
10761 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10762 lsr.b &0x6,%d1
10763 mov.l (%sp)+,%d0
10764 cmpi.l %d0,&0x3fff-0x7ffe
10765 ble.w fsgldiv_may_ovfl
10766
10767 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10768 beq.w fsgldiv_may_unfl # maybe
10769 bgt.w fsgldiv_unfl # yes; go handle underflow
10770
10771fsgldiv_normal:
10772 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10773
10774 fmov.l L_SCR3(%a6),%fpcr # save FPCR
10775 fmov.l &0x0,%fpsr # clear FPSR
10776
10777 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10778
10779 fmov.l %fpsr,%d1 # save FPSR
10780 fmov.l &0x0,%fpcr # clear FPCR
10781
10782 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10783
10784fsgldiv_normal_exit:
10785 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10786 mov.l %d2,-(%sp) # save d2
10787 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10788 mov.l %d1,%d2 # make a copy
10789 andi.l &0x7fff,%d1 # strip sign
10790 andi.w &0x8000,%d2 # keep old sign
10791 sub.l %d0,%d1 # add scale factor
10792 or.w %d2,%d1 # concat old sign,new exp
10793 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10794 mov.l (%sp)+,%d2 # restore d2
10795 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10796 rts
10797
10798fsgldiv_may_ovfl:
10799 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10800
10801 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10802 fmov.l &0x0,%fpsr # set FPSR
10803
10804 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10805
10806 fmov.l %fpsr,%d1
10807 fmov.l &0x0,%fpcr
10808
10809 or.l %d1,USER_FPSR(%a6) # save INEX,N
10810
10811 fmovm.x &0x01,-(%sp) # save result to stack
10812 mov.w (%sp),%d1 # fetch new exponent
10813 add.l &0xc,%sp # clear result
10814 andi.l &0x7fff,%d1 # strip sign
10815 sub.l %d0,%d1 # add scale factor
10816 cmp.l %d1,&0x7fff # did divide overflow?
10817 blt.b fsgldiv_normal_exit
10818
10819fsgldiv_ovfl_tst:
10820 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821
10822 mov.b FPCR_ENABLE(%a6),%d1
10823 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10824 bne.b fsgldiv_ovfl_ena # yes
10825
10826fsgldiv_ovfl_dis:
10827 btst &neg_bit,FPSR_CC(%a6) # is result negative
10828 sne %d1 # set sign param accordingly
10829 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10830 andi.b &0x30,%d0 # kill precision
10831 bsr.l ovf_res # calculate default result
10832 or.b %d0,FPSR_CC(%a6) # set INF if applicable
10833 fmovm.x (%a0),&0x80 # return default result in fp0
10834 rts
10835
10836fsgldiv_ovfl_ena:
10837 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10838
10839 mov.l %d2,-(%sp) # save d2
10840 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10841 mov.l %d1,%d2 # make a copy
10842 andi.l &0x7fff,%d1 # strip sign
10843 andi.w &0x8000,%d2 # keep old sign
10844 sub.l %d0,%d1 # add scale factor
10845 subi.l &0x6000,%d1 # subtract new bias
10846 andi.w &0x7fff,%d1 # clear ms bit
10847 or.w %d2,%d1 # concat old sign,new exp
10848 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10849 mov.l (%sp)+,%d2 # restore d2
10850 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10851 bra.b fsgldiv_ovfl_dis
10852
10853fsgldiv_unfl:
10854 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855
10856 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10857
10858 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10859 fmov.l &0x0,%fpsr # clear FPSR
10860
10861 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10862
10863 fmov.l %fpsr,%d1 # save status
10864 fmov.l &0x0,%fpcr # clear FPCR
10865
10866 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10867
10868 mov.b FPCR_ENABLE(%a6),%d1
10869 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10870 bne.b fsgldiv_unfl_ena # yes
10871
10872fsgldiv_unfl_dis:
10873 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10874
10875 lea FP_SCR0(%a6),%a0 # pass: result addr
10876 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10877 bsr.l unf_res4 # calculate default result
10878 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10879 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10880 rts
10881
10882#
10883# UNFL is enabled.
10884#
10885fsgldiv_unfl_ena:
10886 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10887
10888 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10889 fmov.l &0x0,%fpsr # clear FPSR
10890
10891 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10892
10893 fmov.l &0x0,%fpcr # clear FPCR
10894
10895 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10896 mov.l %d2,-(%sp) # save d2
10897 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10898 mov.l %d1,%d2 # make a copy
10899 andi.l &0x7fff,%d1 # strip sign
10900 andi.w &0x8000,%d2 # keep old sign
10901 sub.l %d0,%d1 # add scale factor
10902 addi.l &0x6000,%d1 # add bias
10903 andi.w &0x7fff,%d1 # clear top bit
10904 or.w %d2,%d1 # concat old sign, new exp
10905 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10906 mov.l (%sp)+,%d2 # restore d2
10907 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10908 bra.b fsgldiv_unfl_dis
10909
10910#
10911# the divide operation MAY underflow:
10912#
10913fsgldiv_may_unfl:
10914 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10915
10916 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10917 fmov.l &0x0,%fpsr # clear FPSR
10918
10919 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10920
10921 fmov.l %fpsr,%d1 # save status
10922 fmov.l &0x0,%fpcr # clear FPCR
10923
10924 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10925
10926 fabs.x %fp0,%fp1 # make a copy of result
10927 fcmp.b %fp1,&0x1 # is |result| > 1.b?
10928 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10929 fblt.w fsgldiv_unfl # yes; underflow occurred
10930
10931#
10932# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933# we don't know if the result was an underflow that rounded up to a 1
10934# or a normalized number that rounded down to a 1. so, redo the entire
10935# operation using RZ as the rounding mode to see what the pre-rounded
10936# result is. this case should be relatively rare.
10937#
10938 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10939
10940 clr.l %d1 # clear scratch register
10941 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10942
10943 fmov.l %d1,%fpcr # set FPCR
10944 fmov.l &0x0,%fpsr # clear FPSR
10945
10946 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10947
10948 fmov.l &0x0,%fpcr # clear FPCR
10949 fabs.x %fp1 # make absolute value
10950 fcmp.b %fp1,&0x1 # is |result| < 1.b?
10951 fbge.w fsgldiv_normal_exit # no; no underflow occurred
10952 bra.w fsgldiv_unfl # yes; underflow occurred
10953
10954############################################################################
10955
10956#
10957# Divide: inputs are not both normalized; what are they?
10958#
10959fsgldiv_not_norm:
10960 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962
10963 swbeg &48
10964tbl_fsgldiv_op:
10965 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10966 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10967 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10968 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10969 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10970 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10971 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10972 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10973
10974 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10975 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10976 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10977 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10978 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10979 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10980 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10981 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10982
10983 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10984 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10985 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10986 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10987 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10988 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
10989 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10990 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10991
10992 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
10993 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
10994 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
10995 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
10997 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
10998 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10999 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11000
11001 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11002 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11003 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11005 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11006 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11007 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11008 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11009
11010 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11011 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11012 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11013 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11014 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11015 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11016 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11017 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11018
11019fsgldiv_res_qnan:
11020 bra.l res_qnan
11021fsgldiv_res_snan:
11022 bra.l res_snan
11023fsgldiv_res_operr:
11024 bra.l res_operr
11025fsgldiv_inf_load:
11026 bra.l fdiv_inf_load
11027fsgldiv_zero_load:
11028 bra.l fdiv_zero_load
11029fsgldiv_inf_dst:
11030 bra.l fdiv_inf_dst
11031
11032#########################################################################
11033# XDEF **************************************************************** #
11034# fadd(): emulates the fadd instruction #
11035# fsadd(): emulates the fadd instruction #
11036# fdadd(): emulates the fdadd instruction #
11037# #
11038# XREF **************************************************************** #
11039# addsub_scaler2() - scale the operands so they won't take exc #
11040# ovf_res() - return default overflow result #
11041# unf_res() - return default underflow result #
11042# res_qnan() - set QNAN result #
11043# res_snan() - set SNAN result #
11044# res_operr() - set OPERR result #
11045# scale_to_zero_src() - set src operand exponent equal to zero #
11046# scale_to_zero_dst() - set dst operand exponent equal to zero #
11047# #
11048# INPUT *************************************************************** #
11049# a0 = pointer to extended precision source operand #
11050# a1 = pointer to extended precision destination operand #
11051# #
11052# OUTPUT ************************************************************** #
11053# fp0 = result #
11054# fp1 = EXOP (if exception occurred) #
11055# #
11056# ALGORITHM *********************************************************** #
11057# Handle NANs, infinities, and zeroes as special cases. Divide #
11058# norms into extended, single, and double precision. #
11059# Do addition after scaling exponents such that exception won't #
11060# occur. Then, check result exponent to see if exception would have #
11061# occurred. If so, return default result and maybe EXOP. Else, insert #
11062# the correct result exponent and return. Set FPSR bits as appropriate. #
11063# #
11064#########################################################################
11065
11066 global fsadd
11067fsadd:
11068 andi.b &0x30,%d0 # clear rnd prec
11069 ori.b &s_mode*0x10,%d0 # insert sgl prec
11070 bra.b fadd
11071
11072 global fdadd
11073fdadd:
11074 andi.b &0x30,%d0 # clear rnd prec
11075 ori.b &d_mode*0x10,%d0 # insert dbl prec
11076
11077 global fadd
11078fadd:
11079 mov.l %d0,L_SCR3(%a6) # store rnd info
11080
11081 clr.w %d1
11082 mov.b DTAG(%a6),%d1
11083 lsl.b &0x3,%d1
11084 or.b STAG(%a6),%d1 # combine src tags
11085
11086 bne.w fadd_not_norm # optimize on non-norm input
11087
11088#
11089# ADD: norms and denorms
11090#
11091fadd_norm:
11092 bsr.l addsub_scaler2 # scale exponents
11093
11094fadd_zero_entry:
11095 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11096
11097 fmov.l &0x0,%fpsr # clear FPSR
11098 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11099
11100 fadd.x FP_SCR0(%a6),%fp0 # execute add
11101
11102 fmov.l &0x0,%fpcr # clear FPCR
11103 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11104
11105 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11106
11107 fbeq.w fadd_zero_exit # if result is zero, end now
11108
11109 mov.l %d2,-(%sp) # save d2
11110
11111 fmovm.x &0x01,-(%sp) # save result to stack
11112
11113 mov.w 2+L_SCR3(%a6),%d1
11114 lsr.b &0x6,%d1
11115
11116 mov.w (%sp),%d2 # fetch new sign, exp
11117 andi.l &0x7fff,%d2 # strip sign
11118 sub.l %d0,%d2 # add scale factor
11119
11120 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121 bge.b fadd_ovfl # yes
11122
11123 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124 blt.w fadd_unfl # yes
11125 beq.w fadd_may_unfl # maybe; go find out
11126
11127fadd_normal:
11128 mov.w (%sp),%d1
11129 andi.w &0x8000,%d1 # keep sign
11130 or.w %d2,%d1 # concat sign,new exp
11131 mov.w %d1,(%sp) # insert new exponent
11132
11133 fmovm.x (%sp)+,&0x80 # return result in fp0
11134
11135 mov.l (%sp)+,%d2 # restore d2
11136 rts
11137
11138fadd_zero_exit:
11139# fmov.s &0x00000000,%fp0 # return zero in fp0
11140 rts
11141
11142tbl_fadd_ovfl:
11143 long 0x7fff # ext ovfl
11144 long 0x407f # sgl ovfl
11145 long 0x43ff # dbl ovfl
11146
11147tbl_fadd_unfl:
11148 long 0x0000 # ext unfl
11149 long 0x3f81 # sgl unfl
11150 long 0x3c01 # dbl unfl
11151
11152fadd_ovfl:
11153 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154
11155 mov.b FPCR_ENABLE(%a6),%d1
11156 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11157 bne.b fadd_ovfl_ena # yes
11158
11159 add.l &0xc,%sp
11160fadd_ovfl_dis:
11161 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11162 sne %d1 # set sign param accordingly
11163 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11164 bsr.l ovf_res # calculate default result
11165 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11166 fmovm.x (%a0),&0x80 # return default result in fp0
11167 mov.l (%sp)+,%d2 # restore d2
11168 rts
11169
11170fadd_ovfl_ena:
11171 mov.b L_SCR3(%a6),%d1
11172 andi.b &0xc0,%d1 # is precision extended?
11173 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11174
11175fadd_ovfl_ena_cont:
11176 mov.w (%sp),%d1
11177 andi.w &0x8000,%d1 # keep sign
11178 subi.l &0x6000,%d2 # add extra bias
11179 andi.w &0x7fff,%d2
11180 or.w %d2,%d1 # concat sign,new exp
11181 mov.w %d1,(%sp) # insert new exponent
11182
11183 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11184 bra.b fadd_ovfl_dis
11185
11186fadd_ovfl_ena_sd:
11187 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11188
11189 mov.l L_SCR3(%a6),%d1
11190 andi.b &0x30,%d1 # keep rnd mode
11191 fmov.l %d1,%fpcr # set FPCR
11192
11193 fadd.x FP_SCR0(%a6),%fp0 # execute add
11194
11195 fmov.l &0x0,%fpcr # clear FPCR
11196
11197 add.l &0xc,%sp
11198 fmovm.x &0x01,-(%sp)
11199 bra.b fadd_ovfl_ena_cont
11200
11201fadd_unfl:
11202 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203
11204 add.l &0xc,%sp
11205
11206 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11207
11208 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11209 fmov.l &0x0,%fpsr # clear FPSR
11210
11211 fadd.x FP_SCR0(%a6),%fp0 # execute add
11212
11213 fmov.l &0x0,%fpcr # clear FPCR
11214 fmov.l %fpsr,%d1 # save status
11215
11216 or.l %d1,USER_FPSR(%a6) # save INEX,N
11217
11218 mov.b FPCR_ENABLE(%a6),%d1
11219 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11220 bne.b fadd_unfl_ena # yes
11221
11222fadd_unfl_dis:
11223 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11224
11225 lea FP_SCR0(%a6),%a0 # pass: result addr
11226 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11227 bsr.l unf_res # calculate default result
11228 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11229 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11230 mov.l (%sp)+,%d2 # restore d2
11231 rts
11232
11233fadd_unfl_ena:
11234 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11235
11236 mov.l L_SCR3(%a6),%d1
11237 andi.b &0xc0,%d1 # is precision extended?
11238 bne.b fadd_unfl_ena_sd # no; sgl or dbl
11239
11240 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11241
11242fadd_unfl_ena_cont:
11243 fmov.l &0x0,%fpsr # clear FPSR
11244
11245 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11246
11247 fmov.l &0x0,%fpcr # clear FPCR
11248
11249 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11250 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11251 mov.l %d1,%d2 # make a copy
11252 andi.l &0x7fff,%d1 # strip sign
11253 andi.w &0x8000,%d2 # keep old sign
11254 sub.l %d0,%d1 # add scale factor
11255 addi.l &0x6000,%d1 # add new bias
11256 andi.w &0x7fff,%d1 # clear top bit
11257 or.w %d2,%d1 # concat sign,new exp
11258 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11259 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11260 bra.w fadd_unfl_dis
11261
11262fadd_unfl_ena_sd:
11263 mov.l L_SCR3(%a6),%d1
11264 andi.b &0x30,%d1 # use only rnd mode
11265 fmov.l %d1,%fpcr # set FPCR
11266
11267 bra.b fadd_unfl_ena_cont
11268
11269#
11270# result is equal to the smallest normalized number in the selected precision
11271# if the precision is extended, this result could not have come from an
11272# underflow that rounded up.
11273#
11274fadd_may_unfl:
11275 mov.l L_SCR3(%a6),%d1
11276 andi.b &0xc0,%d1
11277 beq.w fadd_normal # yes; no underflow occurred
11278
11279 mov.l 0x4(%sp),%d1 # extract hi(man)
11280 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11281 bne.w fadd_normal # no; no underflow occurred
11282
11283 tst.l 0x8(%sp) # is lo(man) = 0x0?
11284 bne.w fadd_normal # no; no underflow occurred
11285
11286 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287 beq.w fadd_normal # no; no underflow occurred
11288
11289#
11290# ok, so now the result has a exponent equal to the smallest normalized
11291# exponent for the selected precision. also, the mantissa is equal to
11292# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293# g,r,s.
11294# now, we must determine whether the pre-rounded result was an underflow
11295# rounded "up" or a normalized number rounded "down".
11296# so, we do this be re-executing the add using RZ as the rounding mode and
11297# seeing if the new result is smaller or equal to the current result.
11298#
11299 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11300
11301 mov.l L_SCR3(%a6),%d1
11302 andi.b &0xc0,%d1 # keep rnd prec
11303 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11304 fmov.l %d1,%fpcr # set FPCR
11305 fmov.l &0x0,%fpsr # clear FPSR
11306
11307 fadd.x FP_SCR0(%a6),%fp1 # execute add
11308
11309 fmov.l &0x0,%fpcr # clear FPCR
11310
11311 fabs.x %fp0 # compare absolute values
11312 fabs.x %fp1
11313 fcmp.x %fp0,%fp1 # is first result > second?
11314
11315 fbgt.w fadd_unfl # yes; it's an underflow
11316 bra.w fadd_normal # no; it's not an underflow
11317
11318##########################################################################
11319
11320#
11321# Add: inputs are not both normalized; what are they?
11322#
11323fadd_not_norm:
11324 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11326
11327 swbeg &48
11328tbl_fadd_op:
11329 short fadd_norm - tbl_fadd_op # NORM + NORM
11330 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11331 short fadd_inf_src - tbl_fadd_op # NORM + INF
11332 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11333 short fadd_norm - tbl_fadd_op # NORM + DENORM
11334 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11335 short tbl_fadd_op - tbl_fadd_op #
11336 short tbl_fadd_op - tbl_fadd_op #
11337
11338 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11339 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11340 short fadd_inf_src - tbl_fadd_op # ZERO + INF
11341 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11342 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11343 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11344 short tbl_fadd_op - tbl_fadd_op #
11345 short tbl_fadd_op - tbl_fadd_op #
11346
11347 short fadd_inf_dst - tbl_fadd_op # INF + NORM
11348 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11349 short fadd_inf_2 - tbl_fadd_op # INF + INF
11350 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11351 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11352 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11353 short tbl_fadd_op - tbl_fadd_op #
11354 short tbl_fadd_op - tbl_fadd_op #
11355
11356 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11357 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11358 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11359 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11360 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11361 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11362 short tbl_fadd_op - tbl_fadd_op #
11363 short tbl_fadd_op - tbl_fadd_op #
11364
11365 short fadd_norm - tbl_fadd_op # DENORM + NORM
11366 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11367 short fadd_inf_src - tbl_fadd_op # DENORM + INF
11368 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11369 short fadd_norm - tbl_fadd_op # DENORM + DENORM
11370 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11371 short tbl_fadd_op - tbl_fadd_op #
11372 short tbl_fadd_op - tbl_fadd_op #
11373
11374 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11375 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11376 short fadd_res_snan - tbl_fadd_op # SNAN + INF
11377 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11378 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11379 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11380 short tbl_fadd_op - tbl_fadd_op #
11381 short tbl_fadd_op - tbl_fadd_op #
11382
11383fadd_res_qnan:
11384 bra.l res_qnan
11385fadd_res_snan:
11386 bra.l res_snan
11387
11388#
11389# both operands are ZEROes
11390#
11391fadd_zero_2:
11392 mov.b SRC_EX(%a0),%d0 # are the signs opposite
11393 mov.b DST_EX(%a1),%d1
11394 eor.b %d0,%d1
11395 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11396
11397# the signs are the same. so determine whether they are positive or negative
11398# and return the appropriately signed zero.
11399 tst.b %d0 # are ZEROes positive or negative?
11400 bmi.b fadd_zero_rm # negative
11401 fmov.s &0x00000000,%fp0 # return +ZERO
11402 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11403 rts
11404
11405#
11406# the ZEROes have opposite signs:
11407# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408# - -ZERO is returned in the case of RM.
11409#
11410fadd_zero_2_chk_rm:
11411 mov.b 3+L_SCR3(%a6),%d1
11412 andi.b &0x30,%d1 # extract rnd mode
11413 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11414 beq.b fadd_zero_rm # yes
11415 fmov.s &0x00000000,%fp0 # return +ZERO
11416 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11417 rts
11418
11419fadd_zero_rm:
11420 fmov.s &0x80000000,%fp0 # return -ZERO
11421 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422 rts
11423
11424#
11425# one operand is a ZERO and the other is a DENORM or NORM. scale
11426# the DENORM or NORM and jump to the regular fadd routine.
11427#
11428fadd_zero_dst:
11429 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11430 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11431 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11432 bsr.l scale_to_zero_src # scale the operand
11433 clr.w FP_SCR1_EX(%a6)
11434 clr.l FP_SCR1_HI(%a6)
11435 clr.l FP_SCR1_LO(%a6)
11436 bra.w fadd_zero_entry # go execute fadd
11437
11438fadd_zero_src:
11439 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11440 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11441 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11442 bsr.l scale_to_zero_dst # scale the operand
11443 clr.w FP_SCR0_EX(%a6)
11444 clr.l FP_SCR0_HI(%a6)
11445 clr.l FP_SCR0_LO(%a6)
11446 bra.w fadd_zero_entry # go execute fadd
11447
11448#
11449# both operands are INFs. an OPERR will result if the INFs have
11450# different signs. else, an INF of the same sign is returned
11451#
11452fadd_inf_2:
11453 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11454 mov.b DST_EX(%a1),%d1
11455 eor.b %d1,%d0
11456 bmi.l res_operr # weed out (-INF)+(+INF)
11457
11458# ok, so it's not an OPERR. but, we do have to remember to return the
11459# src INF since that's where the 881/882 gets the j-bit from...
11460
11461#
11462# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463#
11464fadd_inf_src:
11465 fmovm.x SRC(%a0),&0x80 # return src INF
11466 tst.b SRC_EX(%a0) # is INF positive?
11467 bpl.b fadd_inf_done # yes; we're done
11468 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469 rts
11470
11471#
11472# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473#
11474fadd_inf_dst:
11475 fmovm.x DST(%a1),&0x80 # return dst INF
11476 tst.b DST_EX(%a1) # is INF positive?
11477 bpl.b fadd_inf_done # yes; we're done
11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479 rts
11480
11481fadd_inf_done:
11482 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11483 rts
11484
11485#########################################################################
11486# XDEF **************************************************************** #
11487# fsub(): emulates the fsub instruction #
11488# fssub(): emulates the fssub instruction #
11489# fdsub(): emulates the fdsub instruction #
11490# #
11491# XREF **************************************************************** #
11492# addsub_scaler2() - scale the operands so they won't take exc #
11493# ovf_res() - return default overflow result #
11494# unf_res() - return default underflow result #
11495# res_qnan() - set QNAN result #
11496# res_snan() - set SNAN result #
11497# res_operr() - set OPERR result #
11498# scale_to_zero_src() - set src operand exponent equal to zero #
11499# scale_to_zero_dst() - set dst operand exponent equal to zero #
11500# #
11501# INPUT *************************************************************** #
11502# a0 = pointer to extended precision source operand #
11503# a1 = pointer to extended precision destination operand #
11504# #
11505# OUTPUT ************************************************************** #
11506# fp0 = result #
11507# fp1 = EXOP (if exception occurred) #
11508# #
11509# ALGORITHM *********************************************************** #
11510# Handle NANs, infinities, and zeroes as special cases. Divide #
11511# norms into extended, single, and double precision. #
11512# Do subtraction after scaling exponents such that exception won't#
11513# occur. Then, check result exponent to see if exception would have #
11514# occurred. If so, return default result and maybe EXOP. Else, insert #
11515# the correct result exponent and return. Set FPSR bits as appropriate. #
11516# #
11517#########################################################################
11518
11519 global fssub
11520fssub:
11521 andi.b &0x30,%d0 # clear rnd prec
11522 ori.b &s_mode*0x10,%d0 # insert sgl prec
11523 bra.b fsub
11524
11525 global fdsub
11526fdsub:
11527 andi.b &0x30,%d0 # clear rnd prec
11528 ori.b &d_mode*0x10,%d0 # insert dbl prec
11529
11530 global fsub
11531fsub:
11532 mov.l %d0,L_SCR3(%a6) # store rnd info
11533
11534 clr.w %d1
11535 mov.b DTAG(%a6),%d1
11536 lsl.b &0x3,%d1
11537 or.b STAG(%a6),%d1 # combine src tags
11538
11539 bne.w fsub_not_norm # optimize on non-norm input
11540
11541#
11542# SUB: norms and denorms
11543#
11544fsub_norm:
11545 bsr.l addsub_scaler2 # scale exponents
11546
11547fsub_zero_entry:
11548 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11549
11550 fmov.l &0x0,%fpsr # clear FPSR
11551 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11552
11553 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11554
11555 fmov.l &0x0,%fpcr # clear FPCR
11556 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11557
11558 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11559
11560 fbeq.w fsub_zero_exit # if result zero, end now
11561
11562 mov.l %d2,-(%sp) # save d2
11563
11564 fmovm.x &0x01,-(%sp) # save result to stack
11565
11566 mov.w 2+L_SCR3(%a6),%d1
11567 lsr.b &0x6,%d1
11568
11569 mov.w (%sp),%d2 # fetch new exponent
11570 andi.l &0x7fff,%d2 # strip sign
11571 sub.l %d0,%d2 # add scale factor
11572
11573 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574 bge.b fsub_ovfl # yes
11575
11576 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577 blt.w fsub_unfl # yes
11578 beq.w fsub_may_unfl # maybe; go find out
11579
11580fsub_normal:
11581 mov.w (%sp),%d1
11582 andi.w &0x8000,%d1 # keep sign
11583 or.w %d2,%d1 # insert new exponent
11584 mov.w %d1,(%sp) # insert new exponent
11585
11586 fmovm.x (%sp)+,&0x80 # return result in fp0
11587
11588 mov.l (%sp)+,%d2 # restore d2
11589 rts
11590
11591fsub_zero_exit:
11592# fmov.s &0x00000000,%fp0 # return zero in fp0
11593 rts
11594
11595tbl_fsub_ovfl:
11596 long 0x7fff # ext ovfl
11597 long 0x407f # sgl ovfl
11598 long 0x43ff # dbl ovfl
11599
11600tbl_fsub_unfl:
11601 long 0x0000 # ext unfl
11602 long 0x3f81 # sgl unfl
11603 long 0x3c01 # dbl unfl
11604
11605fsub_ovfl:
11606 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607
11608 mov.b FPCR_ENABLE(%a6),%d1
11609 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11610 bne.b fsub_ovfl_ena # yes
11611
11612 add.l &0xc,%sp
11613fsub_ovfl_dis:
11614 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11615 sne %d1 # set sign param accordingly
11616 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11617 bsr.l ovf_res # calculate default result
11618 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11619 fmovm.x (%a0),&0x80 # return default result in fp0
11620 mov.l (%sp)+,%d2 # restore d2
11621 rts
11622
11623fsub_ovfl_ena:
11624 mov.b L_SCR3(%a6),%d1
11625 andi.b &0xc0,%d1 # is precision extended?
11626 bne.b fsub_ovfl_ena_sd # no
11627
11628fsub_ovfl_ena_cont:
11629 mov.w (%sp),%d1 # fetch {sgn,exp}
11630 andi.w &0x8000,%d1 # keep sign
11631 subi.l &0x6000,%d2 # subtract new bias
11632 andi.w &0x7fff,%d2 # clear top bit
11633 or.w %d2,%d1 # concat sign,exp
11634 mov.w %d1,(%sp) # insert new exponent
11635
11636 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11637 bra.b fsub_ovfl_dis
11638
11639fsub_ovfl_ena_sd:
11640 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11641
11642 mov.l L_SCR3(%a6),%d1
11643 andi.b &0x30,%d1 # clear rnd prec
11644 fmov.l %d1,%fpcr # set FPCR
11645
11646 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11647
11648 fmov.l &0x0,%fpcr # clear FPCR
11649
11650 add.l &0xc,%sp
11651 fmovm.x &0x01,-(%sp)
11652 bra.b fsub_ovfl_ena_cont
11653
11654fsub_unfl:
11655 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656
11657 add.l &0xc,%sp
11658
11659 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11660
11661 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11662 fmov.l &0x0,%fpsr # clear FPSR
11663
11664 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11665
11666 fmov.l &0x0,%fpcr # clear FPCR
11667 fmov.l %fpsr,%d1 # save status
11668
11669 or.l %d1,USER_FPSR(%a6)
11670
11671 mov.b FPCR_ENABLE(%a6),%d1
11672 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11673 bne.b fsub_unfl_ena # yes
11674
11675fsub_unfl_dis:
11676 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11677
11678 lea FP_SCR0(%a6),%a0 # pass: result addr
11679 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11680 bsr.l unf_res # calculate default result
11681 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11682 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11683 mov.l (%sp)+,%d2 # restore d2
11684 rts
11685
11686fsub_unfl_ena:
11687 fmovm.x FP_SCR1(%a6),&0x40
11688
11689 mov.l L_SCR3(%a6),%d1
11690 andi.b &0xc0,%d1 # is precision extended?
11691 bne.b fsub_unfl_ena_sd # no
11692
11693 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11694
11695fsub_unfl_ena_cont:
11696 fmov.l &0x0,%fpsr # clear FPSR
11697
11698 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11699
11700 fmov.l &0x0,%fpcr # clear FPCR
11701
11702 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11703 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11704 mov.l %d1,%d2 # make a copy
11705 andi.l &0x7fff,%d1 # strip sign
11706 andi.w &0x8000,%d2 # keep old sign
11707 sub.l %d0,%d1 # add scale factor
11708 addi.l &0x6000,%d1 # subtract new bias
11709 andi.w &0x7fff,%d1 # clear top bit
11710 or.w %d2,%d1 # concat sgn,exp
11711 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11712 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11713 bra.w fsub_unfl_dis
11714
11715fsub_unfl_ena_sd:
11716 mov.l L_SCR3(%a6),%d1
11717 andi.b &0x30,%d1 # clear rnd prec
11718 fmov.l %d1,%fpcr # set FPCR
11719
11720 bra.b fsub_unfl_ena_cont
11721
11722#
11723# result is equal to the smallest normalized number in the selected precision
11724# if the precision is extended, this result could not have come from an
11725# underflow that rounded up.
11726#
11727fsub_may_unfl:
11728 mov.l L_SCR3(%a6),%d1
11729 andi.b &0xc0,%d1 # fetch rnd prec
11730 beq.w fsub_normal # yes; no underflow occurred
11731
11732 mov.l 0x4(%sp),%d1
11733 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11734 bne.w fsub_normal # no; no underflow occurred
11735
11736 tst.l 0x8(%sp) # is lo(man) = 0x0?
11737 bne.w fsub_normal # no; no underflow occurred
11738
11739 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740 beq.w fsub_normal # no; no underflow occurred
11741
11742#
11743# ok, so now the result has a exponent equal to the smallest normalized
11744# exponent for the selected precision. also, the mantissa is equal to
11745# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746# g,r,s.
11747# now, we must determine whether the pre-rounded result was an underflow
11748# rounded "up" or a normalized number rounded "down".
11749# so, we do this be re-executing the add using RZ as the rounding mode and
11750# seeing if the new result is smaller or equal to the current result.
11751#
11752 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11753
11754 mov.l L_SCR3(%a6),%d1
11755 andi.b &0xc0,%d1 # keep rnd prec
11756 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11757 fmov.l %d1,%fpcr # set FPCR
11758 fmov.l &0x0,%fpsr # clear FPSR
11759
11760 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11761
11762 fmov.l &0x0,%fpcr # clear FPCR
11763
11764 fabs.x %fp0 # compare absolute values
11765 fabs.x %fp1
11766 fcmp.x %fp0,%fp1 # is first result > second?
11767
11768 fbgt.w fsub_unfl # yes; it's an underflow
11769 bra.w fsub_normal # no; it's not an underflow
11770
11771##########################################################################
11772
11773#
11774# Sub: inputs are not both normalized; what are they?
11775#
11776fsub_not_norm:
11777 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11779
11780 swbeg &48
11781tbl_fsub_op:
11782 short fsub_norm - tbl_fsub_op # NORM - NORM
11783 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11784 short fsub_inf_src - tbl_fsub_op # NORM - INF
11785 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11786 short fsub_norm - tbl_fsub_op # NORM - DENORM
11787 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11788 short tbl_fsub_op - tbl_fsub_op #
11789 short tbl_fsub_op - tbl_fsub_op #
11790
11791 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11792 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11793 short fsub_inf_src - tbl_fsub_op # ZERO - INF
11794 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11795 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11796 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11797 short tbl_fsub_op - tbl_fsub_op #
11798 short tbl_fsub_op - tbl_fsub_op #
11799
11800 short fsub_inf_dst - tbl_fsub_op # INF - NORM
11801 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11802 short fsub_inf_2 - tbl_fsub_op # INF - INF
11803 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11804 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11805 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11806 short tbl_fsub_op - tbl_fsub_op #
11807 short tbl_fsub_op - tbl_fsub_op #
11808
11809 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11810 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11811 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11812 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11813 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11814 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11815 short tbl_fsub_op - tbl_fsub_op #
11816 short tbl_fsub_op - tbl_fsub_op #
11817
11818 short fsub_norm - tbl_fsub_op # DENORM - NORM
11819 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11820 short fsub_inf_src - tbl_fsub_op # DENORM - INF
11821 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11822 short fsub_norm - tbl_fsub_op # DENORM - DENORM
11823 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11824 short tbl_fsub_op - tbl_fsub_op #
11825 short tbl_fsub_op - tbl_fsub_op #
11826
11827 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11828 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11829 short fsub_res_snan - tbl_fsub_op # SNAN - INF
11830 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11831 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11832 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11833 short tbl_fsub_op - tbl_fsub_op #
11834 short tbl_fsub_op - tbl_fsub_op #
11835
11836fsub_res_qnan:
11837 bra.l res_qnan
11838fsub_res_snan:
11839 bra.l res_snan
11840
11841#
11842# both operands are ZEROes
11843#
11844fsub_zero_2:
11845 mov.b SRC_EX(%a0),%d0
11846 mov.b DST_EX(%a1),%d1
11847 eor.b %d1,%d0
11848 bpl.b fsub_zero_2_chk_rm
11849
11850# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851 tst.b %d0 # is dst negative?
11852 bmi.b fsub_zero_2_rm # yes
11853 fmov.s &0x00000000,%fp0 # no; return +ZERO
11854 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11855 rts
11856
11857#
11858# the ZEROes have the same signs:
11859# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860# - -ZERO is returned in the case of RM.
11861#
11862fsub_zero_2_chk_rm:
11863 mov.b 3+L_SCR3(%a6),%d1
11864 andi.b &0x30,%d1 # extract rnd mode
11865 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11866 beq.b fsub_zero_2_rm # yes
11867 fmov.s &0x00000000,%fp0 # no; return +ZERO
11868 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11869 rts
11870
11871fsub_zero_2_rm:
11872 fmov.s &0x80000000,%fp0 # return -ZERO
11873 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11874 rts
11875
11876#
11877# one operand is a ZERO and the other is a DENORM or a NORM.
11878# scale the DENORM or NORM and jump to the regular fsub routine.
11879#
11880fsub_zero_dst:
11881 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11882 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11883 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11884 bsr.l scale_to_zero_src # scale the operand
11885 clr.w FP_SCR1_EX(%a6)
11886 clr.l FP_SCR1_HI(%a6)
11887 clr.l FP_SCR1_LO(%a6)
11888 bra.w fsub_zero_entry # go execute fsub
11889
11890fsub_zero_src:
11891 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11892 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11893 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11894 bsr.l scale_to_zero_dst # scale the operand
11895 clr.w FP_SCR0_EX(%a6)
11896 clr.l FP_SCR0_HI(%a6)
11897 clr.l FP_SCR0_LO(%a6)
11898 bra.w fsub_zero_entry # go execute fsub
11899
11900#
11901# both operands are INFs. an OPERR will result if the INFs have the
11902# same signs. else,
11903#
11904fsub_inf_2:
11905 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11906 mov.b DST_EX(%a1),%d1
11907 eor.b %d1,%d0
11908 bpl.l res_operr # weed out (-INF)+(+INF)
11909
11910# ok, so it's not an OPERR. but we do have to remember to return
11911# the src INF since that's where the 881/882 gets the j-bit.
11912
11913fsub_inf_src:
11914 fmovm.x SRC(%a0),&0x80 # return src INF
11915 fneg.x %fp0 # invert sign
11916 fbge.w fsub_inf_done # sign is now positive
11917 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918 rts
11919
11920fsub_inf_dst:
11921 fmovm.x DST(%a1),&0x80 # return dst INF
11922 tst.b DST_EX(%a1) # is INF negative?
11923 bpl.b fsub_inf_done # no
11924 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925 rts
11926
11927fsub_inf_done:
11928 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11929 rts
11930
11931#########################################################################
11932# XDEF **************************************************************** #
11933# fsqrt(): emulates the fsqrt instruction #
11934# fssqrt(): emulates the fssqrt instruction #
11935# fdsqrt(): emulates the fdsqrt instruction #
11936# #
11937# XREF **************************************************************** #
11938# scale_sqrt() - scale the source operand #
11939# unf_res() - return default underflow result #
11940# ovf_res() - return default overflow result #
11941# res_qnan_1op() - return QNAN result #
11942# res_snan_1op() - return SNAN result #
11943# #
11944# INPUT *************************************************************** #
11945# a0 = pointer to extended precision source operand #
11946# d0 rnd prec,mode #
11947# #
11948# OUTPUT ************************************************************** #
11949# fp0 = result #
11950# fp1 = EXOP (if exception occurred) #
11951# #
11952# ALGORITHM *********************************************************** #
11953# Handle NANs, infinities, and zeroes as special cases. Divide #
11954# norms/denorms into ext/sgl/dbl precision. #
11955# For norms/denorms, scale the exponents such that a sqrt #
11956# instruction won't cause an exception. Use the regular fsqrt to #
11957# compute a result. Check if the regular operands would have taken #
11958# an exception. If so, return the default overflow/underflow result #
11959# and return the EXOP if exceptions are enabled. Else, scale the #
11960# result operand to the proper exponent. #
11961# #
11962#########################################################################
11963
11964 global fssqrt
11965fssqrt:
11966 andi.b &0x30,%d0 # clear rnd prec
11967 ori.b &s_mode*0x10,%d0 # insert sgl precision
11968 bra.b fsqrt
11969
11970 global fdsqrt
11971fdsqrt:
11972 andi.b &0x30,%d0 # clear rnd prec
11973 ori.b &d_mode*0x10,%d0 # insert dbl precision
11974
11975 global fsqrt
11976fsqrt:
11977 mov.l %d0,L_SCR3(%a6) # store rnd info
11978 clr.w %d1
11979 mov.b STAG(%a6),%d1
11980 bne.w fsqrt_not_norm # optimize on non-norm input
11981
11982#
11983# SQUARE ROOT: norms and denorms ONLY!
11984#
11985fsqrt_norm:
11986 tst.b SRC_EX(%a0) # is operand negative?
11987 bmi.l res_operr # yes
11988
11989 andi.b &0xc0,%d0 # is precision extended?
11990 bne.b fsqrt_not_ext # no; go handle sgl or dbl
11991
11992 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11993 fmov.l &0x0,%fpsr # clear FPSR
11994
11995 fsqrt.x (%a0),%fp0 # execute square root
11996
11997 fmov.l %fpsr,%d1
11998 or.l %d1,USER_FPSR(%a6) # set N,INEX
11999
12000 rts
12001
12002fsqrt_denorm:
12003 tst.b SRC_EX(%a0) # is operand negative?
12004 bmi.l res_operr # yes
12005
12006 andi.b &0xc0,%d0 # is precision extended?
12007 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12008
12009 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12010 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12011 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12012
12013 bsr.l scale_sqrt # calculate scale factor
12014
12015 bra.w fsqrt_sd_normal
12016
12017#
12018# operand is either single or double
12019#
12020fsqrt_not_ext:
12021 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12022 bne.w fsqrt_dbl
12023
12024#
12025# operand is to be rounded to single precision
12026#
12027fsqrt_sgl:
12028 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12029 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12030 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12031
12032 bsr.l scale_sqrt # calculate scale factor
12033
12034 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12035 beq.w fsqrt_sd_may_unfl
12036 bgt.w fsqrt_sd_unfl # yes; go handle underflow
12037 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12038 beq.w fsqrt_sd_may_ovfl # maybe; go check
12039 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12040
12041#
12042# operand will NOT overflow or underflow when moved in to the fp reg file
12043#
12044fsqrt_sd_normal:
12045 fmov.l &0x0,%fpsr # clear FPSR
12046 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12047
12048 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12049
12050 fmov.l %fpsr,%d1 # save FPSR
12051 fmov.l &0x0,%fpcr # clear FPCR
12052
12053 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12054
12055fsqrt_sd_normal_exit:
12056 mov.l %d2,-(%sp) # save d2
12057 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12058 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12059 mov.l %d1,%d2 # make a copy
12060 andi.l &0x7fff,%d1 # strip sign
12061 sub.l %d0,%d1 # add scale factor
12062 andi.w &0x8000,%d2 # keep old sign
12063 or.w %d1,%d2 # concat old sign,new exp
12064 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12065 mov.l (%sp)+,%d2 # restore d2
12066 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12067 rts
12068
12069#
12070# operand is to be rounded to double precision
12071#
12072fsqrt_dbl:
12073 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12074 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12075 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12076
12077 bsr.l scale_sqrt # calculate scale factor
12078
12079 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12080 beq.w fsqrt_sd_may_unfl
12081 bgt.b fsqrt_sd_unfl # yes; go handle underflow
12082 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12083 beq.w fsqrt_sd_may_ovfl # maybe; go check
12084 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12085 bra.w fsqrt_sd_normal # no; ho handle normalized op
12086
12087# we're on the line here and the distinguising characteristic is whether
12088# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089# elsewise fall through to underflow.
12090fsqrt_sd_may_unfl:
12091 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12092 bne.w fsqrt_sd_normal # yes, so no underflow
12093
12094#
12095# operand WILL underflow when moved in to the fp register file
12096#
12097fsqrt_sd_unfl:
12098 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099
12100 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12101 fmov.l &0x0,%fpsr # clear FPSR
12102
12103 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12104
12105 fmov.l %fpsr,%d1 # save status
12106 fmov.l &0x0,%fpcr # clear FPCR
12107
12108 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12109
12110# if underflow or inexact is enabled, go calculate EXOP first.
12111 mov.b FPCR_ENABLE(%a6),%d1
12112 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12113 bne.b fsqrt_sd_unfl_ena # yes
12114
12115fsqrt_sd_unfl_dis:
12116 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12117
12118 lea FP_SCR0(%a6),%a0 # pass: result addr
12119 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12120 bsr.l unf_res # calculate default result
12121 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12122 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12123 rts
12124
12125#
12126# operand will underflow AND underflow is enabled.
12127# Therefore, we must return the result rounded to extended precision.
12128#
12129fsqrt_sd_unfl_ena:
12130 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12133
12134 mov.l %d2,-(%sp) # save d2
12135 mov.l %d1,%d2 # make a copy
12136 andi.l &0x7fff,%d1 # strip sign
12137 andi.w &0x8000,%d2 # keep old sign
12138 sub.l %d0,%d1 # subtract scale factor
12139 addi.l &0x6000,%d1 # add new bias
12140 andi.w &0x7fff,%d1
12141 or.w %d2,%d1 # concat new sign,new exp
12142 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12143 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12144 mov.l (%sp)+,%d2 # restore d2
12145 bra.b fsqrt_sd_unfl_dis
12146
12147#
12148# operand WILL overflow.
12149#
12150fsqrt_sd_ovfl:
12151 fmov.l &0x0,%fpsr # clear FPSR
12152 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12153
12154 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12155
12156 fmov.l &0x0,%fpcr # clear FPCR
12157 fmov.l %fpsr,%d1 # save FPSR
12158
12159 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12160
12161fsqrt_sd_ovfl_tst:
12162 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163
12164 mov.b FPCR_ENABLE(%a6),%d1
12165 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12166 bne.b fsqrt_sd_ovfl_ena # yes
12167
12168#
12169# OVFL is not enabled; therefore, we must create the default result by
12170# calling ovf_res().
12171#
12172fsqrt_sd_ovfl_dis:
12173 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12174 sne %d1 # set sign param accordingly
12175 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12176 bsr.l ovf_res # calculate default result
12177 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12178 fmovm.x (%a0),&0x80 # return default result in fp0
12179 rts
12180
12181#
12182# OVFL is enabled.
12183# the INEX2 bit has already been updated by the round to the correct precision.
12184# now, round to extended(and don't alter the FPSR).
12185#
12186fsqrt_sd_ovfl_ena:
12187 mov.l %d2,-(%sp) # save d2
12188 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12189 mov.l %d1,%d2 # make a copy
12190 andi.l &0x7fff,%d1 # strip sign
12191 andi.w &0x8000,%d2 # keep old sign
12192 sub.l %d0,%d1 # add scale factor
12193 subi.l &0x6000,%d1 # subtract bias
12194 andi.w &0x7fff,%d1
12195 or.w %d2,%d1 # concat sign,exp
12196 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12197 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12198 mov.l (%sp)+,%d2 # restore d2
12199 bra.b fsqrt_sd_ovfl_dis
12200
12201#
12202# the move in MAY underflow. so...
12203#
12204fsqrt_sd_may_ovfl:
12205 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12206 bne.w fsqrt_sd_ovfl # yes, so overflow
12207
12208 fmov.l &0x0,%fpsr # clear FPSR
12209 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12210
12211 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12212
12213 fmov.l %fpsr,%d1 # save status
12214 fmov.l &0x0,%fpcr # clear FPCR
12215
12216 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12217
12218 fmov.x %fp0,%fp1 # make a copy of result
12219 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12220 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12221
12222# no, it didn't overflow; we have correct result
12223 bra.w fsqrt_sd_normal_exit
12224
12225##########################################################################
12226
12227#
12228# input is not normalized; what is it?
12229#
12230fsqrt_not_norm:
12231 cmpi.b %d1,&DENORM # weed out DENORM
12232 beq.w fsqrt_denorm
12233 cmpi.b %d1,&ZERO # weed out ZERO
12234 beq.b fsqrt_zero
12235 cmpi.b %d1,&INF # weed out INF
12236 beq.b fsqrt_inf
12237 cmpi.b %d1,&SNAN # weed out SNAN
12238 beq.l res_snan_1op
12239 bra.l res_qnan_1op
12240
12241#
12242# fsqrt(+0) = +0
12243# fsqrt(-0) = -0
12244# fsqrt(+INF) = +INF
12245# fsqrt(-INF) = OPERR
12246#
12247fsqrt_zero:
12248 tst.b SRC_EX(%a0) # is ZERO positive or negative?
12249 bmi.b fsqrt_zero_m # negative
12250fsqrt_zero_p:
12251 fmov.s &0x00000000,%fp0 # return +ZERO
12252 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12253 rts
12254fsqrt_zero_m:
12255 fmov.s &0x80000000,%fp0 # return -ZERO
12256 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12257 rts
12258
12259fsqrt_inf:
12260 tst.b SRC_EX(%a0) # is INF positive or negative?
12261 bmi.l res_operr # negative
12262fsqrt_inf_p:
12263 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12264 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12265 rts
12266
12267#########################################################################
12268# XDEF **************************************************************** #
12269# fetch_dreg(): fetch register according to index in d1 #
12270# #
12271# XREF **************************************************************** #
12272# None #
12273# #
12274# INPUT *************************************************************** #
12275# d1 = index of register to fetch from #
12276# #
12277# OUTPUT ************************************************************** #
12278# d0 = value of register fetched #
12279# #
12280# ALGORITHM *********************************************************** #
12281# According to the index value in d1 which can range from zero #
12282# to fifteen, load the corresponding register file value (where #
12283# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12284# stack. The rest should still be in their original places. #
12285# #
12286#########################################################################
12287
12288# this routine leaves d1 intact for subsequent store_dreg calls.
12289 global fetch_dreg
12290fetch_dreg:
12291 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12292 jmp (tbl_fdreg.b,%pc,%d0.w*1)
12293
12294tbl_fdreg:
12295 short fdreg0 - tbl_fdreg
12296 short fdreg1 - tbl_fdreg
12297 short fdreg2 - tbl_fdreg
12298 short fdreg3 - tbl_fdreg
12299 short fdreg4 - tbl_fdreg
12300 short fdreg5 - tbl_fdreg
12301 short fdreg6 - tbl_fdreg
12302 short fdreg7 - tbl_fdreg
12303 short fdreg8 - tbl_fdreg
12304 short fdreg9 - tbl_fdreg
12305 short fdrega - tbl_fdreg
12306 short fdregb - tbl_fdreg
12307 short fdregc - tbl_fdreg
12308 short fdregd - tbl_fdreg
12309 short fdrege - tbl_fdreg
12310 short fdregf - tbl_fdreg
12311
12312fdreg0:
12313 mov.l EXC_DREGS+0x0(%a6),%d0
12314 rts
12315fdreg1:
12316 mov.l EXC_DREGS+0x4(%a6),%d0
12317 rts
12318fdreg2:
12319 mov.l %d2,%d0
12320 rts
12321fdreg3:
12322 mov.l %d3,%d0
12323 rts
12324fdreg4:
12325 mov.l %d4,%d0
12326 rts
12327fdreg5:
12328 mov.l %d5,%d0
12329 rts
12330fdreg6:
12331 mov.l %d6,%d0
12332 rts
12333fdreg7:
12334 mov.l %d7,%d0
12335 rts
12336fdreg8:
12337 mov.l EXC_DREGS+0x8(%a6),%d0
12338 rts
12339fdreg9:
12340 mov.l EXC_DREGS+0xc(%a6),%d0
12341 rts
12342fdrega:
12343 mov.l %a2,%d0
12344 rts
12345fdregb:
12346 mov.l %a3,%d0
12347 rts
12348fdregc:
12349 mov.l %a4,%d0
12350 rts
12351fdregd:
12352 mov.l %a5,%d0
12353 rts
12354fdrege:
12355 mov.l (%a6),%d0
12356 rts
12357fdregf:
12358 mov.l EXC_A7(%a6),%d0
12359 rts
12360
12361#########################################################################
12362# XDEF **************************************************************** #
12363# store_dreg_l(): store longword to data register specified by d1 #
12364# #
12365# XREF **************************************************************** #
12366# None #
12367# #
12368# INPUT *************************************************************** #
12369# d0 = longowrd value to store #
12370# d1 = index of register to fetch from #
12371# #
12372# OUTPUT ************************************************************** #
12373# (data register is updated) #
12374# #
12375# ALGORITHM *********************************************************** #
12376# According to the index value in d1, store the longword value #
12377# in d0 to the corresponding data register. D0/D1 are on the stack #
12378# while the rest are in their initial places. #
12379# #
12380#########################################################################
12381
12382 global store_dreg_l
12383store_dreg_l:
12384 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12385 jmp (tbl_sdregl.b,%pc,%d1.w*1)
12386
12387tbl_sdregl:
12388 short sdregl0 - tbl_sdregl
12389 short sdregl1 - tbl_sdregl
12390 short sdregl2 - tbl_sdregl
12391 short sdregl3 - tbl_sdregl
12392 short sdregl4 - tbl_sdregl
12393 short sdregl5 - tbl_sdregl
12394 short sdregl6 - tbl_sdregl
12395 short sdregl7 - tbl_sdregl
12396
12397sdregl0:
12398 mov.l %d0,EXC_DREGS+0x0(%a6)
12399 rts
12400sdregl1:
12401 mov.l %d0,EXC_DREGS+0x4(%a6)
12402 rts
12403sdregl2:
12404 mov.l %d0,%d2
12405 rts
12406sdregl3:
12407 mov.l %d0,%d3
12408 rts
12409sdregl4:
12410 mov.l %d0,%d4
12411 rts
12412sdregl5:
12413 mov.l %d0,%d5
12414 rts
12415sdregl6:
12416 mov.l %d0,%d6
12417 rts
12418sdregl7:
12419 mov.l %d0,%d7
12420 rts
12421
12422#########################################################################
12423# XDEF **************************************************************** #
12424# store_dreg_w(): store word to data register specified by d1 #
12425# #
12426# XREF **************************************************************** #
12427# None #
12428# #
12429# INPUT *************************************************************** #
12430# d0 = word value to store #
12431# d1 = index of register to fetch from #
12432# #
12433# OUTPUT ************************************************************** #
12434# (data register is updated) #
12435# #
12436# ALGORITHM *********************************************************** #
12437# According to the index value in d1, store the word value #
12438# in d0 to the corresponding data register. D0/D1 are on the stack #
12439# while the rest are in their initial places. #
12440# #
12441#########################################################################
12442
12443 global store_dreg_w
12444store_dreg_w:
12445 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12446 jmp (tbl_sdregw.b,%pc,%d1.w*1)
12447
12448tbl_sdregw:
12449 short sdregw0 - tbl_sdregw
12450 short sdregw1 - tbl_sdregw
12451 short sdregw2 - tbl_sdregw
12452 short sdregw3 - tbl_sdregw
12453 short sdregw4 - tbl_sdregw
12454 short sdregw5 - tbl_sdregw
12455 short sdregw6 - tbl_sdregw
12456 short sdregw7 - tbl_sdregw
12457
12458sdregw0:
12459 mov.w %d0,2+EXC_DREGS+0x0(%a6)
12460 rts
12461sdregw1:
12462 mov.w %d0,2+EXC_DREGS+0x4(%a6)
12463 rts
12464sdregw2:
12465 mov.w %d0,%d2
12466 rts
12467sdregw3:
12468 mov.w %d0,%d3
12469 rts
12470sdregw4:
12471 mov.w %d0,%d4
12472 rts
12473sdregw5:
12474 mov.w %d0,%d5
12475 rts
12476sdregw6:
12477 mov.w %d0,%d6
12478 rts
12479sdregw7:
12480 mov.w %d0,%d7
12481 rts
12482
12483#########################################################################
12484# XDEF **************************************************************** #
12485# store_dreg_b(): store byte to data register specified by d1 #
12486# #
12487# XREF **************************************************************** #
12488# None #
12489# #
12490# INPUT *************************************************************** #
12491# d0 = byte value to store #
12492# d1 = index of register to fetch from #
12493# #
12494# OUTPUT ************************************************************** #
12495# (data register is updated) #
12496# #
12497# ALGORITHM *********************************************************** #
12498# According to the index value in d1, store the byte value #
12499# in d0 to the corresponding data register. D0/D1 are on the stack #
12500# while the rest are in their initial places. #
12501# #
12502#########################################################################
12503
12504 global store_dreg_b
12505store_dreg_b:
12506 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12507 jmp (tbl_sdregb.b,%pc,%d1.w*1)
12508
12509tbl_sdregb:
12510 short sdregb0 - tbl_sdregb
12511 short sdregb1 - tbl_sdregb
12512 short sdregb2 - tbl_sdregb
12513 short sdregb3 - tbl_sdregb
12514 short sdregb4 - tbl_sdregb
12515 short sdregb5 - tbl_sdregb
12516 short sdregb6 - tbl_sdregb
12517 short sdregb7 - tbl_sdregb
12518
12519sdregb0:
12520 mov.b %d0,3+EXC_DREGS+0x0(%a6)
12521 rts
12522sdregb1:
12523 mov.b %d0,3+EXC_DREGS+0x4(%a6)
12524 rts
12525sdregb2:
12526 mov.b %d0,%d2
12527 rts
12528sdregb3:
12529 mov.b %d0,%d3
12530 rts
12531sdregb4:
12532 mov.b %d0,%d4
12533 rts
12534sdregb5:
12535 mov.b %d0,%d5
12536 rts
12537sdregb6:
12538 mov.b %d0,%d6
12539 rts
12540sdregb7:
12541 mov.b %d0,%d7
12542 rts
12543
12544#########################################################################
12545# XDEF **************************************************************** #
12546# inc_areg(): increment an address register by the value in d0 #
12547# #
12548# XREF **************************************************************** #
12549# None #
12550# #
12551# INPUT *************************************************************** #
12552# d0 = amount to increment by #
12553# d1 = index of address register to increment #
12554# #
12555# OUTPUT ************************************************************** #
12556# (address register is updated) #
12557# #
12558# ALGORITHM *********************************************************** #
12559# Typically used for an instruction w/ a post-increment <ea>, #
12560# this routine adds the increment value in d0 to the address register #
12561# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12562# in their original places. #
12563# For a7, if the increment amount is one, then we have to #
12564# increment by two. For any a7 update, set the mia7_flag so that if #
12565# an access error exception occurs later in emulation, this address #
12566# register update can be undone. #
12567# #
12568#########################################################################
12569
12570 global inc_areg
12571inc_areg:
12572 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12573 jmp (tbl_iareg.b,%pc,%d1.w*1)
12574
12575tbl_iareg:
12576 short iareg0 - tbl_iareg
12577 short iareg1 - tbl_iareg
12578 short iareg2 - tbl_iareg
12579 short iareg3 - tbl_iareg
12580 short iareg4 - tbl_iareg
12581 short iareg5 - tbl_iareg
12582 short iareg6 - tbl_iareg
12583 short iareg7 - tbl_iareg
12584
12585iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12586 rts
12587iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12588 rts
12589iareg2: add.l %d0,%a2
12590 rts
12591iareg3: add.l %d0,%a3
12592 rts
12593iareg4: add.l %d0,%a4
12594 rts
12595iareg5: add.l %d0,%a5
12596 rts
12597iareg6: add.l %d0,(%a6)
12598 rts
12599iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12600 cmpi.b %d0,&0x1
12601 beq.b iareg7b
12602 add.l %d0,EXC_A7(%a6)
12603 rts
12604iareg7b:
12605 addq.l &0x2,EXC_A7(%a6)
12606 rts
12607
12608#########################################################################
12609# XDEF **************************************************************** #
12610# dec_areg(): decrement an address register by the value in d0 #
12611# #
12612# XREF **************************************************************** #
12613# None #
12614# #
12615# INPUT *************************************************************** #
12616# d0 = amount to decrement by #
12617# d1 = index of address register to decrement #
12618# #
12619# OUTPUT ************************************************************** #
12620# (address register is updated) #
12621# #
12622# ALGORITHM *********************************************************** #
12623# Typically used for an instruction w/ a pre-decrement <ea>, #
12624# this routine adds the decrement value in d0 to the address register #
12625# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12626# in their original places. #
12627# For a7, if the decrement amount is one, then we have to #
12628# decrement by two. For any a7 update, set the mda7_flag so that if #
12629# an access error exception occurs later in emulation, this address #
12630# register update can be undone. #
12631# #
12632#########################################################################
12633
12634 global dec_areg
12635dec_areg:
12636 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12637 jmp (tbl_dareg.b,%pc,%d1.w*1)
12638
12639tbl_dareg:
12640 short dareg0 - tbl_dareg
12641 short dareg1 - tbl_dareg
12642 short dareg2 - tbl_dareg
12643 short dareg3 - tbl_dareg
12644 short dareg4 - tbl_dareg
12645 short dareg5 - tbl_dareg
12646 short dareg6 - tbl_dareg
12647 short dareg7 - tbl_dareg
12648
12649dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12650 rts
12651dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12652 rts
12653dareg2: sub.l %d0,%a2
12654 rts
12655dareg3: sub.l %d0,%a3
12656 rts
12657dareg4: sub.l %d0,%a4
12658 rts
12659dareg5: sub.l %d0,%a5
12660 rts
12661dareg6: sub.l %d0,(%a6)
12662 rts
12663dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12664 cmpi.b %d0,&0x1
12665 beq.b dareg7b
12666 sub.l %d0,EXC_A7(%a6)
12667 rts
12668dareg7b:
12669 subq.l &0x2,EXC_A7(%a6)
12670 rts
12671
12672##############################################################################
12673
12674#########################################################################
12675# XDEF **************************************************************** #
12676# load_fpn1(): load FP register value into FP_SRC(a6). #
12677# #
12678# XREF **************************************************************** #
12679# None #
12680# #
12681# INPUT *************************************************************** #
12682# d0 = index of FP register to load #
12683# #
12684# OUTPUT ************************************************************** #
12685# FP_SRC(a6) = value loaded from FP register file #
12686# #
12687# ALGORITHM *********************************************************** #
12688# Using the index in d0, load FP_SRC(a6) with a number from the #
12689# FP register file. #
12690# #
12691#########################################################################
12692
12693 global load_fpn1
12694load_fpn1:
12695 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12697
12698tbl_load_fpn1:
12699 short load_fpn1_0 - tbl_load_fpn1
12700 short load_fpn1_1 - tbl_load_fpn1
12701 short load_fpn1_2 - tbl_load_fpn1
12702 short load_fpn1_3 - tbl_load_fpn1
12703 short load_fpn1_4 - tbl_load_fpn1
12704 short load_fpn1_5 - tbl_load_fpn1
12705 short load_fpn1_6 - tbl_load_fpn1
12706 short load_fpn1_7 - tbl_load_fpn1
12707
12708load_fpn1_0:
12709 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712 lea FP_SRC(%a6), %a0
12713 rts
12714load_fpn1_1:
12715 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718 lea FP_SRC(%a6), %a0
12719 rts
12720load_fpn1_2:
12721 fmovm.x &0x20, FP_SRC(%a6)
12722 lea FP_SRC(%a6), %a0
12723 rts
12724load_fpn1_3:
12725 fmovm.x &0x10, FP_SRC(%a6)
12726 lea FP_SRC(%a6), %a0
12727 rts
12728load_fpn1_4:
12729 fmovm.x &0x08, FP_SRC(%a6)
12730 lea FP_SRC(%a6), %a0
12731 rts
12732load_fpn1_5:
12733 fmovm.x &0x04, FP_SRC(%a6)
12734 lea FP_SRC(%a6), %a0
12735 rts
12736load_fpn1_6:
12737 fmovm.x &0x02, FP_SRC(%a6)
12738 lea FP_SRC(%a6), %a0
12739 rts
12740load_fpn1_7:
12741 fmovm.x &0x01, FP_SRC(%a6)
12742 lea FP_SRC(%a6), %a0
12743 rts
12744
12745#############################################################################
12746
12747#########################################################################
12748# XDEF **************************************************************** #
12749# load_fpn2(): load FP register value into FP_DST(a6). #
12750# #
12751# XREF **************************************************************** #
12752# None #
12753# #
12754# INPUT *************************************************************** #
12755# d0 = index of FP register to load #
12756# #
12757# OUTPUT ************************************************************** #
12758# FP_DST(a6) = value loaded from FP register file #
12759# #
12760# ALGORITHM *********************************************************** #
12761# Using the index in d0, load FP_DST(a6) with a number from the #
12762# FP register file. #
12763# #
12764#########################################################################
12765
12766 global load_fpn2
12767load_fpn2:
12768 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12770
12771tbl_load_fpn2:
12772 short load_fpn2_0 - tbl_load_fpn2
12773 short load_fpn2_1 - tbl_load_fpn2
12774 short load_fpn2_2 - tbl_load_fpn2
12775 short load_fpn2_3 - tbl_load_fpn2
12776 short load_fpn2_4 - tbl_load_fpn2
12777 short load_fpn2_5 - tbl_load_fpn2
12778 short load_fpn2_6 - tbl_load_fpn2
12779 short load_fpn2_7 - tbl_load_fpn2
12780
12781load_fpn2_0:
12782 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785 lea FP_DST(%a6), %a0
12786 rts
12787load_fpn2_1:
12788 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791 lea FP_DST(%a6), %a0
12792 rts
12793load_fpn2_2:
12794 fmovm.x &0x20, FP_DST(%a6)
12795 lea FP_DST(%a6), %a0
12796 rts
12797load_fpn2_3:
12798 fmovm.x &0x10, FP_DST(%a6)
12799 lea FP_DST(%a6), %a0
12800 rts
12801load_fpn2_4:
12802 fmovm.x &0x08, FP_DST(%a6)
12803 lea FP_DST(%a6), %a0
12804 rts
12805load_fpn2_5:
12806 fmovm.x &0x04, FP_DST(%a6)
12807 lea FP_DST(%a6), %a0
12808 rts
12809load_fpn2_6:
12810 fmovm.x &0x02, FP_DST(%a6)
12811 lea FP_DST(%a6), %a0
12812 rts
12813load_fpn2_7:
12814 fmovm.x &0x01, FP_DST(%a6)
12815 lea FP_DST(%a6), %a0
12816 rts
12817
12818#############################################################################
12819
12820#########################################################################
12821# XDEF **************************************************************** #
12822# store_fpreg(): store an fp value to the fpreg designated d0. #
12823# #
12824# XREF **************************************************************** #
12825# None #
12826# #
12827# INPUT *************************************************************** #
12828# fp0 = extended precision value to store #
12829# d0 = index of floating-point register #
12830# #
12831# OUTPUT ************************************************************** #
12832# None #
12833# #
12834# ALGORITHM *********************************************************** #
12835# Store the value in fp0 to the FP register designated by the #
12836# value in d0. The FP number can be DENORM or SNAN so we have to be #
12837# careful that we don't take an exception here. #
12838# #
12839#########################################################################
12840
12841 global store_fpreg
12842store_fpreg:
12843 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12845
12846tbl_store_fpreg:
12847 short store_fpreg_0 - tbl_store_fpreg
12848 short store_fpreg_1 - tbl_store_fpreg
12849 short store_fpreg_2 - tbl_store_fpreg
12850 short store_fpreg_3 - tbl_store_fpreg
12851 short store_fpreg_4 - tbl_store_fpreg
12852 short store_fpreg_5 - tbl_store_fpreg
12853 short store_fpreg_6 - tbl_store_fpreg
12854 short store_fpreg_7 - tbl_store_fpreg
12855
12856store_fpreg_0:
12857 fmovm.x &0x80, EXC_FP0(%a6)
12858 rts
12859store_fpreg_1:
12860 fmovm.x &0x80, EXC_FP1(%a6)
12861 rts
12862store_fpreg_2:
12863 fmovm.x &0x01, -(%sp)
12864 fmovm.x (%sp)+, &0x20
12865 rts
12866store_fpreg_3:
12867 fmovm.x &0x01, -(%sp)
12868 fmovm.x (%sp)+, &0x10
12869 rts
12870store_fpreg_4:
12871 fmovm.x &0x01, -(%sp)
12872 fmovm.x (%sp)+, &0x08
12873 rts
12874store_fpreg_5:
12875 fmovm.x &0x01, -(%sp)
12876 fmovm.x (%sp)+, &0x04
12877 rts
12878store_fpreg_6:
12879 fmovm.x &0x01, -(%sp)
12880 fmovm.x (%sp)+, &0x02
12881 rts
12882store_fpreg_7:
12883 fmovm.x &0x01, -(%sp)
12884 fmovm.x (%sp)+, &0x01
12885 rts
12886
12887#########################################################################
12888# XDEF **************************************************************** #
12889# get_packed(): fetch a packed operand from memory and then #
12890# convert it to a floating-point binary number. #
12891# #
12892# XREF **************************************************************** #
12893# _dcalc_ea() - calculate the correct <ea> #
12894# _mem_read() - fetch the packed operand from memory #
12895# facc_in_x() - the fetch failed so jump to special exit code #
12896# decbin() - convert packed to binary extended precision #
12897# #
12898# INPUT *************************************************************** #
12899# None #
12900# #
12901# OUTPUT ************************************************************** #
12902# If no failure on _mem_read(): #
12903# FP_SRC(a6) = packed operand now as a binary FP number #
12904# #
12905# ALGORITHM *********************************************************** #
12906# Get the correct <ea> which is the value on the exception stack #
12907# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12908# Then, fetch the operand from memory. If the fetch fails, exit #
12909# through facc_in_x(). #
12910# If the packed operand is a ZERO,NAN, or INF, convert it to #
12911# its binary representation here. Else, call decbin() which will #
12912# convert the packed value to an extended precision binary value. #
12913# #
12914#########################################################################
12915
12916# the stacked <ea> for packed is correct except for -(An).
12917# the base reg must be updated for both -(An) and (An)+.
12918 global get_packed
12919get_packed:
12920 mov.l &0xc,%d0 # packed is 12 bytes
12921 bsr.l _dcalc_ea # fetch <ea>; correct An
12922
12923 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12924 mov.l &0xc,%d0 # pass: 12 bytes
12925 bsr.l _dmem_read # read packed operand
12926
12927 tst.l %d1 # did dfetch fail?
12928 bne.l facc_in_x # yes
12929
12930# The packed operand is an INF or a NAN if the exponent field is all ones.
12931 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12932 cmpi.w %d0,&0x7fff # INF or NAN?
12933 bne.b gp_try_zero # no
12934 rts # operand is an INF or NAN
12935
12936# The packed operand is a zero if the mantissa is all zero, else it's
12937# a normal packed op.
12938gp_try_zero:
12939 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12940 andi.b &0x0f,%d0 # clear all but last nybble
12941 bne.b gp_not_spec # not a zero
12942 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12943 bne.b gp_not_spec # not a zero
12944 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12945 bne.b gp_not_spec # not a zero
12946 rts # operand is a ZERO
12947gp_not_spec:
12948 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12949 bsr.l decbin # convert to extended
12950 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12951 rts
12952
12953#########################################################################
12954# decbin(): Converts normalized packed bcd value pointed to by register #
12955# a0 to extended-precision value in fp0. #
12956# #
12957# INPUT *************************************************************** #
12958# a0 = pointer to normalized packed bcd value #
12959# #
12960# OUTPUT ************************************************************** #
12961# fp0 = exact fp representation of the packed bcd value. #
12962# #
12963# ALGORITHM *********************************************************** #
12964# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12965# and NaN operands are dispatched without entering this routine) #
12966# value in 68881/882 format at location (a0). #
12967# #
12968# A1. Convert the bcd exponent to binary by successive adds and #
12969# muls. Set the sign according to SE. Subtract 16 to compensate #
12970# for the mantissa which is to be interpreted as 17 integer #
12971# digits, rather than 1 integer and 16 fraction digits. #
12972# Note: this operation can never overflow. #
12973# #
12974# A2. Convert the bcd mantissa to binary by successive #
12975# adds and muls in FP0. Set the sign according to SM. #
12976# The mantissa digits will be converted with the decimal point #
12977# assumed following the least-significant digit. #
12978# Note: this operation can never overflow. #
12979# #
12980# A3. Count the number of leading/trailing zeros in the #
12981# bcd string. If SE is positive, count the leading zeros; #
12982# if negative, count the trailing zeros. Set the adjusted #
12983# exponent equal to the exponent from A1 and the zero count #
12984# added if SM = 1 and subtracted if SM = 0. Scale the #
12985# mantissa the equivalent of forcing in the bcd value: #
12986# #
12987# SM = 0 a non-zero digit in the integer position #
12988# SM = 1 a non-zero digit in Mant0, lsd of the fraction #
12989# #
12990# this will insure that any value, regardless of its #
12991# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
12992# consistently. #
12993# #
12994# A4. Calculate the factor 10^exp in FP1 using a table of #
12995# 10^(2^n) values. To reduce the error in forming factors #
12996# greater than 10^27, a directed rounding scheme is used with #
12997# tables rounded to RN, RM, and RP, according to the table #
12998# in the comments of the pwrten section. #
12999# #
13000# A5. Form the final binary number by scaling the mantissa by #
13001# the exponent factor. This is done by multiplying the #
13002# mantissa in FP0 by the factor in FP1 if the adjusted #
13003# exponent sign is positive, and dividing FP0 by FP1 if #
13004# it is negative. #
13005# #
13006# Clean up and return. Check if the final mul or div was inexact. #
13007# If so, set INEX1 in USER_FPSR. #
13008# #
13009#########################################################################
13010
13011#
13012# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013# to nearest, minus, and plus, respectively. The tables include
13014# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13015# is required until the power is greater than 27, however, all
13016# tables include the first 5 for ease of indexing.
13017#
13018RTABLE:
13019 byte 0,0,0,0
13020 byte 2,3,2,3
13021 byte 2,3,3,2
13022 byte 3,2,2,3
13023
13024 set FNIBS,7
13025 set FSTRT,0
13026
13027 set ESTRT,4
13028 set EDIGITS,2
13029
13030 global decbin
13031decbin:
13032 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13035
13036 lea FP_SCR0(%a6),%a0
13037
13038 movm.l &0x3c00,-(%sp) # save d2-d5
13039 fmovm.x &0x1,-(%sp) # save fp1
13040#
13041# Calculate exponent:
13042# 1. Copy bcd value in memory for use as a working copy.
13043# 2. Calculate absolute value of exponent in d1 by mul and add.
13044# 3. Correct for exponent sign.
13045# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046# (i.e., all digits assumed left of the decimal point.)
13047#
13048# Register usage:
13049#
13050# calc_e:
13051# (*) d0: temp digit storage
13052# (*) d1: accumulator for binary exponent
13053# (*) d2: digit count
13054# (*) d3: offset pointer
13055# ( ) d4: first word of bcd
13056# ( ) a0: pointer to working bcd value
13057# ( ) a6: pointer to original bcd value
13058# (*) FP_SCR1: working copy of original bcd value
13059# (*) L_SCR1: copy of original exponent word
13060#
13061calc_e:
13062 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13063 mov.l &ESTRT,%d3 # counter to pick up digits
13064 mov.l (%a0),%d4 # get first word of bcd
13065 clr.l %d1 # zero d1 for accumulator
13066e_gd:
13067 mulu.l &0xa,%d1 # mul partial product by one digit place
13068 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13069 add.l %d0,%d1 # d1 = d1 + d0
13070 addq.b &4,%d3 # advance d3 to the next digit
13071 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13072 btst &30,%d4 # get SE
13073 beq.b e_pos # don't negate if pos
13074 neg.l %d1 # negate before subtracting
13075e_pos:
13076 sub.l &16,%d1 # sub to compensate for shift of mant
13077 bge.b e_save # if still pos, do not neg
13078 neg.l %d1 # now negative, make pos and set SE
13079 or.l &0x40000000,%d4 # set SE in d4,
13080 or.l &0x40000000,(%a0) # and in working bcd
13081e_save:
13082 mov.l %d1,-(%sp) # save exp on stack
13083#
13084#
13085# Calculate mantissa:
13086# 1. Calculate absolute value of mantissa in fp0 by mul and add.
13087# 2. Correct for mantissa sign.
13088# (i.e., all digits assumed left of the decimal point.)
13089#
13090# Register usage:
13091#
13092# calc_m:
13093# (*) d0: temp digit storage
13094# (*) d1: lword counter
13095# (*) d2: digit count
13096# (*) d3: offset pointer
13097# ( ) d4: words 2 and 3 of bcd
13098# ( ) a0: pointer to working bcd value
13099# ( ) a6: pointer to original bcd value
13100# (*) fp0: mantissa accumulator
13101# ( ) FP_SCR1: working copy of original bcd value
13102# ( ) L_SCR1: copy of original exponent word
13103#
13104calc_m:
13105 mov.l &1,%d1 # word counter, init to 1
13106 fmov.s &0x00000000,%fp0 # accumulator
13107#
13108#
13109# Since the packed number has a long word between the first & second parts,
13110# get the integer digit then skip down & get the rest of the
13111# mantissa. We will unroll the loop once.
13112#
13113 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13114 fadd.b %d0,%fp0 # add digit to sum in fp0
13115#
13116#
13117# Get the rest of the mantissa.
13118#
13119loadlw:
13120 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13121 mov.l &FSTRT,%d3 # counter to pick up digits
13122 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13123md2b:
13124 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13125 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13126 fadd.b %d0,%fp0 # fp0 = fp0 + digit
13127#
13128#
13129# If all the digits (8) in that long word have been converted (d2=0),
13130# then inc d1 (=2) to point to the next long word and reset d3 to 0
13131# to initialize the digit offset, and set d2 to 7 for the digit count;
13132# else continue with this long word.
13133#
13134 addq.b &4,%d3 # advance d3 to the next digit
13135 dbf.w %d2,md2b # check for last digit in this lw
13136nextlw:
13137 addq.l &1,%d1 # inc lw pointer in mantissa
13138 cmp.l %d1,&2 # test for last lw
13139 ble.b loadlw # if not, get last one
13140#
13141# Check the sign of the mant and make the value in fp0 the same sign.
13142#
13143m_sign:
13144 btst &31,(%a0) # test sign of the mantissa
13145 beq.b ap_st_z # if clear, go to append/strip zeros
13146 fneg.x %fp0 # if set, negate fp0
13147#
13148# Append/strip zeros:
13149#
13150# For adjusted exponents which have an absolute value greater than 27*,
13151# this routine calculates the amount needed to normalize the mantissa
13152# for the adjusted exponent. That number is subtracted from the exp
13153# if the exp was positive, and added if it was negative. The purpose
13154# of this is to reduce the value of the exponent and the possibility
13155# of error in calculation of pwrten.
13156#
13157# 1. Branch on the sign of the adjusted exponent.
13158# 2p.(positive exp)
13159# 2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160# 3. Add one for each zero encountered until a non-zero digit.
13161# 4. Subtract the count from the exp.
13162# 5. Check if the exp has crossed zero in #3 above; make the exp abs
13163# and set SE.
13164# 6. Multiply the mantissa by 10**count.
13165# 2n.(negative exp)
13166# 2. Check the digits in lwords 3 and 2 in decending order.
13167# 3. Add one for each zero encountered until a non-zero digit.
13168# 4. Add the count to the exp.
13169# 5. Check if the exp has crossed zero in #3 above; clear SE.
13170# 6. Divide the mantissa by 10**count.
13171#
13172# *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13173# any adjustment due to append/strip zeros will drive the resultane
13174# exponent towards zero. Since all pwrten constants with a power
13175# of 27 or less are exact, there is no need to use this routine to
13176# attempt to lessen the resultant exponent.
13177#
13178# Register usage:
13179#
13180# ap_st_z:
13181# (*) d0: temp digit storage
13182# (*) d1: zero count
13183# (*) d2: digit count
13184# (*) d3: offset pointer
13185# ( ) d4: first word of bcd
13186# (*) d5: lword counter
13187# ( ) a0: pointer to working bcd value
13188# ( ) FP_SCR1: working copy of original bcd value
13189# ( ) L_SCR1: copy of original exponent word
13190#
13191#
13192# First check the absolute value of the exponent to see if this
13193# routine is necessary. If so, then check the sign of the exponent
13194# and do append (+) or strip (-) zeros accordingly.
13195# This section handles a positive adjusted exponent.
13196#
13197ap_st_z:
13198 mov.l (%sp),%d1 # load expA for range test
13199 cmp.l %d1,&27 # test is with 27
13200 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13201 btst &30,(%a0) # check sign of exp
13202 bne.b ap_st_n # if neg, go to neg side
13203 clr.l %d1 # zero count reg
13204 mov.l (%a0),%d4 # load lword 1 to d4
13205 bfextu %d4{&28:&4},%d0 # get M16 in d0
13206 bne.b ap_p_fx # if M16 is non-zero, go fix exp
13207 addq.l &1,%d1 # inc zero count
13208 mov.l &1,%d5 # init lword counter
13209 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13210 bne.b ap_p_cl # if lw 2 is zero, skip it
13211 addq.l &8,%d1 # and inc count by 8
13212 addq.l &1,%d5 # inc lword counter
13213 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13214ap_p_cl:
13215 clr.l %d3 # init offset reg
13216 mov.l &7,%d2 # init digit counter
13217ap_p_gd:
13218 bfextu %d4{%d3:&4},%d0 # get digit
13219 bne.b ap_p_fx # if non-zero, go to fix exp
13220 addq.l &4,%d3 # point to next digit
13221 addq.l &1,%d1 # inc digit counter
13222 dbf.w %d2,ap_p_gd # get next digit
13223ap_p_fx:
13224 mov.l %d1,%d0 # copy counter to d2
13225 mov.l (%sp),%d1 # get adjusted exp from memory
13226 sub.l %d0,%d1 # subtract count from exp
13227 bge.b ap_p_fm # if still pos, go to pwrten
13228 neg.l %d1 # now its neg; get abs
13229 mov.l (%a0),%d4 # load lword 1 to d4
13230 or.l &0x40000000,%d4 # and set SE in d4
13231 or.l &0x40000000,(%a0) # and in memory
13232#
13233# Calculate the mantissa multiplier to compensate for the striping of
13234# zeros from the mantissa.
13235#
13236ap_p_fm:
13237 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13238 clr.l %d3 # init table index
13239 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13240 mov.l &3,%d2 # init d2 to count bits in counter
13241ap_p_el:
13242 asr.l &1,%d0 # shift lsb into carry
13243 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13244 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13245ap_p_en:
13246 add.l &12,%d3 # inc d3 to next rtable entry
13247 tst.l %d0 # check if d0 is zero
13248 bne.b ap_p_el # if not, get next bit
13249 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13250 bra.b pwrten # go calc pwrten
13251#
13252# This section handles a negative adjusted exponent.
13253#
13254ap_st_n:
13255 clr.l %d1 # clr counter
13256 mov.l &2,%d5 # set up d5 to point to lword 3
13257 mov.l (%a0,%d5.L*4),%d4 # get lword 3
13258 bne.b ap_n_cl # if not zero, check digits
13259 sub.l &1,%d5 # dec d5 to point to lword 2
13260 addq.l &8,%d1 # inc counter by 8
13261 mov.l (%a0,%d5.L*4),%d4 # get lword 2
13262ap_n_cl:
13263 mov.l &28,%d3 # point to last digit
13264 mov.l &7,%d2 # init digit counter
13265ap_n_gd:
13266 bfextu %d4{%d3:&4},%d0 # get digit
13267 bne.b ap_n_fx # if non-zero, go to exp fix
13268 subq.l &4,%d3 # point to previous digit
13269 addq.l &1,%d1 # inc digit counter
13270 dbf.w %d2,ap_n_gd # get next digit
13271ap_n_fx:
13272 mov.l %d1,%d0 # copy counter to d0
13273 mov.l (%sp),%d1 # get adjusted exp from memory
13274 sub.l %d0,%d1 # subtract count from exp
13275 bgt.b ap_n_fm # if still pos, go fix mantissa
13276 neg.l %d1 # take abs of exp and clr SE
13277 mov.l (%a0),%d4 # load lword 1 to d4
13278 and.l &0xbfffffff,%d4 # and clr SE in d4
13279 and.l &0xbfffffff,(%a0) # and in memory
13280#
13281# Calculate the mantissa multiplier to compensate for the appending of
13282# zeros to the mantissa.
13283#
13284ap_n_fm:
13285 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13286 clr.l %d3 # init table index
13287 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13288 mov.l &3,%d2 # init d2 to count bits in counter
13289ap_n_el:
13290 asr.l &1,%d0 # shift lsb into carry
13291 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13292 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13293ap_n_en:
13294 add.l &12,%d3 # inc d3 to next rtable entry
13295 tst.l %d0 # check if d0 is zero
13296 bne.b ap_n_el # if not, get next bit
13297 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13298#
13299#
13300# Calculate power-of-ten factor from adjusted and shifted exponent.
13301#
13302# Register usage:
13303#
13304# pwrten:
13305# (*) d0: temp
13306# ( ) d1: exponent
13307# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308# (*) d3: FPCR work copy
13309# ( ) d4: first word of bcd
13310# (*) a1: RTABLE pointer
13311# calc_p:
13312# (*) d0: temp
13313# ( ) d1: exponent
13314# (*) d3: PWRTxx table index
13315# ( ) a0: pointer to working copy of bcd
13316# (*) a1: PWRTxx pointer
13317# (*) fp1: power-of-ten accumulator
13318#
13319# Pwrten calculates the exponent factor in the selected rounding mode
13320# according to the following table:
13321#
13322# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13323#
13324# ANY ANY RN RN
13325#
13326# + + RP RP
13327# - + RP RM
13328# + - RP RM
13329# - - RP RP
13330#
13331# + + RM RM
13332# - + RM RP
13333# + - RM RP
13334# - - RM RM
13335#
13336# + + RZ RM
13337# - + RZ RM
13338# + - RZ RP
13339# - - RZ RP
13340#
13341#
13342pwrten:
13343 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13344 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13345 mov.l (%a0),%d4 # reload 1st bcd word to d4
13346 asl.l &2,%d2 # format d2 to be
13347 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13348 add.l %d0,%d2 # in d2 as index into RTABLE
13349 lea.l RTABLE(%pc),%a1 # load rtable base
13350 mov.b (%a1,%d2),%d0 # load new rounding bits from table
13351 clr.l %d3 # clear d3 to force no exc and extended
13352 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13353 fmov.l %d3,%fpcr # write new FPCR
13354 asr.l &1,%d0 # write correct PTENxx table
13355 bcc.b not_rp # to a1
13356 lea.l PTENRP(%pc),%a1 # it is RP
13357 bra.b calc_p # go to init section
13358not_rp:
13359 asr.l &1,%d0 # keep checking
13360 bcc.b not_rm
13361 lea.l PTENRM(%pc),%a1 # it is RM
13362 bra.b calc_p # go to init section
13363not_rm:
13364 lea.l PTENRN(%pc),%a1 # it is RN
13365calc_p:
13366 mov.l %d1,%d0 # copy exp to d0;use d0
13367 bpl.b no_neg # if exp is negative,
13368 neg.l %d0 # invert it
13369 or.l &0x40000000,(%a0) # and set SE bit
13370no_neg:
13371 clr.l %d3 # table index
13372 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13373e_loop:
13374 asr.l &1,%d0 # shift next bit into carry
13375 bcc.b e_next # if zero, skip the mul
13376 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13377e_next:
13378 add.l &12,%d3 # inc d3 to next rtable entry
13379 tst.l %d0 # check if d0 is zero
13380 bne.b e_loop # not zero, continue shifting
13381#
13382#
13383# Check the sign of the adjusted exp and make the value in fp0 the
13384# same sign. If the exp was pos then multiply fp1*fp0;
13385# else divide fp0/fp1.
13386#
13387# Register Usage:
13388# norm:
13389# ( ) a0: pointer to working bcd value
13390# (*) fp0: mantissa accumulator
13391# ( ) fp1: scaling factor - 10**(abs(exp))
13392#
13393pnorm:
13394 btst &30,(%a0) # test the sign of the exponent
13395 beq.b mul # if clear, go to multiply
13396div:
13397 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13398 bra.b end_dec
13399mul:
13400 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13401#
13402#
13403# Clean up and return with result in fp0.
13404#
13405# If the final mul/div in decbin incurred an inex exception,
13406# it will be inex2, but will be reported as inex1 by get_op.
13407#
13408end_dec:
13409 fmov.l %fpsr,%d0 # get status register
13410 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13411 beq.b no_exc # skip this if no exc
13412 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413no_exc:
13414 add.l &0x4,%sp # clear 1 lw param
13415 fmovm.x (%sp)+,&0x40 # restore fp1
13416 movm.l (%sp)+,&0x3c # restore d2-d5
13417 fmov.l &0x0,%fpcr
13418 fmov.l &0x0,%fpsr
13419 rts
13420
13421#########################################################################
13422# bindec(): Converts an input in extended precision format to bcd format#
13423# #
13424# INPUT *************************************************************** #
13425# a0 = pointer to the input extended precision value in memory. #
13426# the input may be either normalized, unnormalized, or #
13427# denormalized. #
13428# d0 = contains the k-factor sign-extended to 32-bits. #
13429# #
13430# OUTPUT ************************************************************** #
13431# FP_SCR0(a6) = bcd format result on the stack. #
13432# #
13433# ALGORITHM *********************************************************** #
13434# #
13435# A1. Set RM and size ext; Set SIGMA = sign of input. #
13436# The k-factor is saved for use in d7. Clear the #
13437# BINDEC_FLG for separating normalized/denormalized #
13438# input. If input is unnormalized or denormalized, #
13439# normalize it. #
13440# #
13441# A2. Set X = abs(input). #
13442# #
13443# A3. Compute ILOG. #
13444# ILOG is the log base 10 of the input value. It is #
13445# approximated by adding e + 0.f when the original #
13446# value is viewed as 2^^e * 1.f in extended precision. #
13447# This value is stored in d6. #
13448# #
13449# A4. Clr INEX bit. #
13450# The operation in A3 above may have set INEX2. #
13451# #
13452# A5. Set ICTR = 0; #
13453# ICTR is a flag used in A13. It must be set before the #
13454# loop entry A6. #
13455# #
13456# A6. Calculate LEN. #
13457# LEN is the number of digits to be displayed. The #
13458# k-factor can dictate either the total number of digits, #
13459# if it is a positive number, or the number of digits #
13460# after the decimal point which are to be included as #
13461# significant. See the 68882 manual for examples. #
13462# If LEN is computed to be greater than 17, set OPERR in #
13463# USER_FPSR. LEN is stored in d4. #
13464# #
13465# A7. Calculate SCALE. #
13466# SCALE is equal to 10^ISCALE, where ISCALE is the number #
13467# of decimal places needed to insure LEN integer digits #
13468# in the output before conversion to bcd. LAMBDA is the #
13469# sign of ISCALE, used in A9. Fp1 contains #
13470# 10^^(abs(ISCALE)) using a rounding mode which is a #
13471# function of the original rounding mode and the signs #
13472# of ISCALE and X. A table is given in the code. #
13473# #
13474# A8. Clr INEX; Force RZ. #
13475# The operation in A3 above may have set INEX2. #
13476# RZ mode is forced for the scaling operation to insure #
13477# only one rounding error. The grs bits are collected in #
13478# the INEX flag for use in A10. #
13479# #
13480# A9. Scale X -> Y. #
13481# The mantissa is scaled to the desired number of #
13482# significant digits. The excess digits are collected #
13483# in INEX2. #
13484# #
13485# A10. Or in INEX. #
13486# If INEX is set, round error occurred. This is #
13487# compensated for by 'or-ing' in the INEX2 flag to #
13488# the lsb of Y. #
13489# #
13490# A11. Restore original FPCR; set size ext. #
13491# Perform FINT operation in the user's rounding mode. #
13492# Keep the size to extended. #
13493# #
13494# A12. Calculate YINT = FINT(Y) according to user's rounding #
13495# mode. The FPSP routine sintd0 is used. The output #
13496# is in fp0. #
13497# #
13498# A13. Check for LEN digits. #
13499# If the int operation results in more than LEN digits, #
13500# or less than LEN -1 digits, adjust ILOG and repeat from #
13501# A6. This test occurs only on the first pass. If the #
13502# result is exactly 10^LEN, decrement ILOG and divide #
13503# the mantissa by 10. #
13504# #
13505# A14. Convert the mantissa to bcd. #
13506# The binstr routine is used to convert the LEN digit #
13507# mantissa to bcd in memory. The input to binstr is #
13508# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13509# such that the decimal point is to the left of bit 63. #
13510# The bcd digits are stored in the correct position in #
13511# the final string area in memory. #
13512# #
13513# A15. Convert the exponent to bcd. #
13514# As in A14 above, the exp is converted to bcd and the #
13515# digits are stored in the final string. #
13516# Test the length of the final exponent string. If the #
13517# length is 4, set operr. #
13518# #
13519# A16. Write sign bits to final string. #
13520# #
13521#########################################################################
13522
13523set BINDEC_FLG, EXC_TEMP # DENORM flag
13524
13525# Constants in extended precision
13526PLOG2:
13527 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528PLOG2UP1:
13529 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530
13531# Constants in single precision
13532FONE:
13533 long 0x3F800000,0x00000000,0x00000000,0x00000000
13534FTWO:
13535 long 0x40000000,0x00000000,0x00000000,0x00000000
13536FTEN:
13537 long 0x41200000,0x00000000,0x00000000,0x00000000
13538F4933:
13539 long 0x459A2800,0x00000000,0x00000000,0x00000000
13540
13541RBDTBL:
13542 byte 0,0,0,0
13543 byte 3,3,2,2
13544 byte 3,2,2,3
13545 byte 2,3,3,2
13546
13547# Implementation Notes:
13548#
13549# The registers are used as follows:
13550#
13551# d0: scratch; LEN input to binstr
13552# d1: scratch
13553# d2: upper 32-bits of mantissa for binstr
13554# d3: scratch;lower 32-bits of mantissa for binstr
13555# d4: LEN
13556# d5: LAMBDA/ICTR
13557# d6: ILOG
13558# d7: k-factor
13559# a0: ptr for original operand/final result
13560# a1: scratch pointer
13561# a2: pointer to FP_X; abs(original value) in ext
13562# fp0: scratch
13563# fp1: scratch
13564# fp2: scratch
13565# F_SCR1:
13566# F_SCR2:
13567# L_SCR1:
13568# L_SCR2:
13569
13570 global bindec
13571bindec:
13572 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13573 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13574
13575# A1. Set RM and size ext. Set SIGMA = sign input;
13576# The k-factor is saved for use in d7. Clear BINDEC_FLG for
13577# separating normalized/denormalized input. If the input
13578# is a denormalized number, set the BINDEC_FLG memory word
13579# to signal denorm. If the input is unnormalized, normalize
13580# the input and test for denormalized result.
13581#
13582 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13583 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13584 mov.l %d0,%d7 # move k-factor to d7
13585
13586 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13587 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13588 bne.w A2_str # no; input is a NORM
13589
13590#
13591# Normalize the denorm
13592#
13593un_de_norm:
13594 mov.w (%a0),%d0
13595 and.w &0x7fff,%d0 # strip sign of normalized exp
13596 mov.l 4(%a0),%d1
13597 mov.l 8(%a0),%d2
13598norm_loop:
13599 sub.w &1,%d0
13600 lsl.l &1,%d2
13601 roxl.l &1,%d1
13602 tst.l %d1
13603 bge.b norm_loop
13604#
13605# Test if the normalized input is denormalized
13606#
13607 tst.w %d0
13608 bgt.b pos_exp # if greater than zero, it is a norm
13609 st BINDEC_FLG(%a6) # set flag for denorm
13610pos_exp:
13611 and.w &0x7fff,%d0 # strip sign of normalized exp
13612 mov.w %d0,(%a0)
13613 mov.l %d1,4(%a0)
13614 mov.l %d2,8(%a0)
13615
13616# A2. Set X = abs(input).
13617#
13618A2_str:
13619 mov.l (%a0),FP_SCR1(%a6) # move input to work space
13620 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13621 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13622 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13623
13624# A3. Compute ILOG.
13625# ILOG is the log base 10 of the input value. It is approx-
13626# imated by adding e + 0.f when the original value is viewed
13627# as 2^^e * 1.f in extended precision. This value is stored
13628# in d6.
13629#
13630# Register usage:
13631# Input/Output
13632# d0: k-factor/exponent
13633# d2: x/x
13634# d3: x/x
13635# d4: x/x
13636# d5: x/x
13637# d6: x/ILOG
13638# d7: k-factor/Unchanged
13639# a0: ptr for original operand/final result
13640# a1: x/x
13641# a2: x/x
13642# fp0: x/float(ILOG)
13643# fp1: x/x
13644# fp2: x/x
13645# F_SCR1:x/x
13646# F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647# L_SCR1:x/x
13648# L_SCR2:first word of X packed/Unchanged
13649
13650 tst.b BINDEC_FLG(%a6) # check for denorm
13651 beq.b A3_cont # if clr, continue with norm
13652 mov.l &-4933,%d6 # force ILOG = -4933
13653 bra.b A4_str
13654A3_cont:
13655 mov.w FP_SCR1(%a6),%d0 # move exp to d0
13656 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13657 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13658 sub.w &0x3fff,%d0 # strip off bias
13659 fadd.w %d0,%fp0 # add in exp
13660 fsub.s FONE(%pc),%fp0 # subtract off 1.0
13661 fbge.w pos_res # if pos, branch
13662 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13663 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13664 bra.b A4_str # go move out ILOG
13665pos_res:
13666 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13667 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13668
13669
13670# A4. Clr INEX bit.
13671# The operation in A3 above may have set INEX2.
13672
13673A4_str:
13674 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13675
13676
13677# A5. Set ICTR = 0;
13678# ICTR is a flag used in A13. It must be set before the
13679# loop entry A6. The lower word of d5 is used for ICTR.
13680
13681 clr.w %d5 # clear ICTR
13682
13683# A6. Calculate LEN.
13684# LEN is the number of digits to be displayed. The k-factor
13685# can dictate either the total number of digits, if it is
13686# a positive number, or the number of digits after the
13687# original decimal point which are to be included as
13688# significant. See the 68882 manual for examples.
13689# If LEN is computed to be greater than 17, set OPERR in
13690# USER_FPSR. LEN is stored in d4.
13691#
13692# Register usage:
13693# Input/Output
13694# d0: exponent/Unchanged
13695# d2: x/x/scratch
13696# d3: x/x
13697# d4: exc picture/LEN
13698# d5: ICTR/Unchanged
13699# d6: ILOG/Unchanged
13700# d7: k-factor/Unchanged
13701# a0: ptr for original operand/final result
13702# a1: x/x
13703# a2: x/x
13704# fp0: float(ILOG)/Unchanged
13705# fp1: x/x
13706# fp2: x/x
13707# F_SCR1:x/x
13708# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709# L_SCR1:x/x
13710# L_SCR2:first word of X packed/Unchanged
13711
13712A6_str:
13713 tst.l %d7 # branch on sign of k
13714 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13715 mov.l %d7,%d4 # if k > 0, LEN = k
13716 bra.b len_ck # skip to LEN check
13717k_neg:
13718 mov.l %d6,%d4 # first load ILOG to d4
13719 sub.l %d7,%d4 # subtract off k
13720 addq.l &1,%d4 # add in the 1
13721len_ck:
13722 tst.l %d4 # LEN check: branch on sign of LEN
13723 ble.b LEN_ng # if neg, set LEN = 1
13724 cmp.l %d4,&17 # test if LEN > 17
13725 ble.b A7_str # if not, forget it
13726 mov.l &17,%d4 # set max LEN = 17
13727 tst.l %d7 # if negative, never set OPERR
13728 ble.b A7_str # if positive, continue
13729 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13730 bra.b A7_str # finished here
13731LEN_ng:
13732 mov.l &1,%d4 # min LEN is 1
13733
13734
13735# A7. Calculate SCALE.
13736# SCALE is equal to 10^ISCALE, where ISCALE is the number
13737# of decimal places needed to insure LEN integer digits
13738# in the output before conversion to bcd. LAMBDA is the sign
13739# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13740# the rounding mode as given in the following table (see
13741# Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742# of opposite sign in bindec.sa from Coonen).
13743#
13744# Initial USE
13745# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13746# ----------------------------------------------
13747# RN 00 0 0 00/0 RN
13748# RN 00 0 1 00/0 RN
13749# RN 00 1 0 00/0 RN
13750# RN 00 1 1 00/0 RN
13751# RZ 01 0 0 11/3 RP
13752# RZ 01 0 1 11/3 RP
13753# RZ 01 1 0 10/2 RM
13754# RZ 01 1 1 10/2 RM
13755# RM 10 0 0 11/3 RP
13756# RM 10 0 1 10/2 RM
13757# RM 10 1 0 10/2 RM
13758# RM 10 1 1 11/3 RP
13759# RP 11 0 0 10/2 RM
13760# RP 11 0 1 11/3 RP
13761# RP 11 1 0 11/3 RP
13762# RP 11 1 1 10/2 RM
13763#
13764# Register usage:
13765# Input/Output
13766# d0: exponent/scratch - final is 0
13767# d2: x/0 or 24 for A9
13768# d3: x/scratch - offset ptr into PTENRM array
13769# d4: LEN/Unchanged
13770# d5: 0/ICTR:LAMBDA
13771# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772# d7: k-factor/Unchanged
13773# a0: ptr for original operand/final result
13774# a1: x/ptr to PTENRM array
13775# a2: x/x
13776# fp0: float(ILOG)/Unchanged
13777# fp1: x/10^ISCALE
13778# fp2: x/x
13779# F_SCR1:x/x
13780# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781# L_SCR1:x/x
13782# L_SCR2:first word of X packed/Unchanged
13783
13784A7_str:
13785 tst.l %d7 # test sign of k
13786 bgt.b k_pos # if pos and > 0, skip this
13787 cmp.l %d7,%d6 # test k - ILOG
13788 blt.b k_pos # if ILOG >= k, skip this
13789 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13790k_pos:
13791 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13792 addq.l &1,%d0 # add the 1
13793 sub.l %d4,%d0 # sub off LEN
13794 swap %d5 # use upper word of d5 for LAMBDA
13795 clr.w %d5 # set it zero initially
13796 clr.w %d2 # set up d2 for very small case
13797 tst.l %d0 # test sign of ISCALE
13798 bge.b iscale # if pos, skip next inst
13799 addq.w &1,%d5 # if neg, set LAMBDA true
13800 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13801 bgt.b no_inf # if false, skip rest
13802 add.l &24,%d0 # add in 24 to iscale
13803 mov.l &24,%d2 # put 24 in d2 for A9
13804no_inf:
13805 neg.l %d0 # and take abs of ISCALE
13806iscale:
13807 fmov.s FONE(%pc),%fp1 # init fp1 to 1
13808 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13809 lsl.w &1,%d1 # put them in bits 2:1
13810 add.w %d5,%d1 # add in LAMBDA
13811 lsl.w &1,%d1 # put them in bits 3:1
13812 tst.l L_SCR2(%a6) # test sign of original x
13813 bge.b x_pos # if pos, don't set bit 0
13814 addq.l &1,%d1 # if neg, set bit 0
13815x_pos:
13816 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13817 mov.b (%a2,%d1),%d3 # load d3 with new rmode
13818 lsl.l &4,%d3 # put bits in proper position
13819 fmov.l %d3,%fpcr # load bits into fpu
13820 lsr.l &4,%d3 # put bits in proper position
13821 tst.b %d3 # decode new rmode for pten table
13822 bne.b not_rn # if zero, it is RN
13823 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13824 bra.b rmode # exit decode
13825not_rn:
13826 lsr.b &1,%d3 # get lsb in carry
13827 bcc.b not_rp2 # if carry clear, it is RM
13828 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13829 bra.b rmode # exit decode
13830not_rp2:
13831 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13832rmode:
13833 clr.l %d3 # clr table index
13834e_loop2:
13835 lsr.l &1,%d0 # shift next bit into carry
13836 bcc.b e_next2 # if zero, skip the mul
13837 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13838e_next2:
13839 add.l &12,%d3 # inc d3 to next pwrten table entry
13840 tst.l %d0 # test if ISCALE is zero
13841 bne.b e_loop2 # if not, loop
13842
13843# A8. Clr INEX; Force RZ.
13844# The operation in A3 above may have set INEX2.
13845# RZ mode is forced for the scaling operation to insure
13846# only one rounding error. The grs bits are collected in
13847# the INEX flag for use in A10.
13848#
13849# Register usage:
13850# Input/Output
13851
13852 fmov.l &0,%fpsr # clr INEX
13853 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13854
13855# A9. Scale X -> Y.
13856# The mantissa is scaled to the desired number of significant
13857# digits. The excess digits are collected in INEX2. If mul,
13858# Check d2 for excess 10 exponential value. If not zero,
13859# the iscale value would have caused the pwrten calculation
13860# to overflow. Only a negative iscale can cause this, so
13861# multiply by 10^(d2), which is now only allowed to be 24,
13862# with a multiply by 10^8 and 10^16, which is exact since
13863# 10^24 is exact. If the input was denormalized, we must
13864# create a busy stack frame with the mul command and the
13865# two operands, and allow the fpu to complete the multiply.
13866#
13867# Register usage:
13868# Input/Output
13869# d0: FPCR with RZ mode/Unchanged
13870# d2: 0 or 24/unchanged
13871# d3: x/x
13872# d4: LEN/Unchanged
13873# d5: ICTR:LAMBDA
13874# d6: ILOG/Unchanged
13875# d7: k-factor/Unchanged
13876# a0: ptr for original operand/final result
13877# a1: ptr to PTENRM array/Unchanged
13878# a2: x/x
13879# fp0: float(ILOG)/X adjusted for SCALE (Y)
13880# fp1: 10^ISCALE/Unchanged
13881# fp2: x/x
13882# F_SCR1:x/x
13883# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884# L_SCR1:x/x
13885# L_SCR2:first word of X packed/Unchanged
13886
13887A9_str:
13888 fmov.x (%a0),%fp0 # load X from memory
13889 fabs.x %fp0 # use abs(X)
13890 tst.w %d5 # LAMBDA is in lower word of d5
13891 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13892 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13893 bra.w A10_st # branch to A10
13894
13895sc_mul:
13896 tst.b BINDEC_FLG(%a6) # check for denorm
13897 beq.w A9_norm # if norm, continue with mul
13898
13899# for DENORM, we must calculate:
13900# fp0 = input_op * 10^ISCALE * 10^24
13901# since the input operand is a DENORM, we can't multiply it directly.
13902# so, we do the multiplication of the exponents and mantissas separately.
13903# in this way, we avoid underflow on intermediate stages of the
13904# multiplication and guarantee a result without exception.
13905 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13906
13907 mov.w (%sp),%d3 # grab exponent
13908 andi.w &0x7fff,%d3 # clear sign
13909 ori.w &0x8000,(%a0) # make DENORM exp negative
13910 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13911 subi.w &0x3fff,%d3 # subtract BIAS
13912 add.w 36(%a1),%d3
13913 subi.w &0x3fff,%d3 # subtract BIAS
13914 add.w 48(%a1),%d3
13915 subi.w &0x3fff,%d3 # subtract BIAS
13916
13917 bmi.w sc_mul_err # is result is DENORM, punt!!!
13918
13919 andi.w &0x8000,(%sp) # keep sign
13920 or.w %d3,(%sp) # insert new exponent
13921 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13922 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13923 mov.l 0x4(%a0),-(%sp)
13924 mov.l &0x3fff0000,-(%sp) # force exp to zero
13925 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13926 fmul.x (%sp)+,%fp0
13927
13928# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13929# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13930 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13931 mov.l 36+4(%a1),-(%sp)
13932 mov.l &0x3fff0000,-(%sp) # force exp to zero
13933 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13934 mov.l 48+4(%a1),-(%sp)
13935 mov.l &0x3fff0000,-(%sp)# force exp to zero
13936 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13937 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13938 bra.b A10_st
13939
13940sc_mul_err:
13941 bra.b sc_mul_err
13942
13943A9_norm:
13944 tst.w %d2 # test for small exp case
13945 beq.b A9_con # if zero, continue as normal
13946 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13947 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13948A9_con:
13949 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13950
13951# A10. Or in INEX.
13952# If INEX is set, round error occurred. This is compensated
13953# for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954#
13955# Register usage:
13956# Input/Output
13957# d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958# d2: x/x
13959# d3: x/x
13960# d4: LEN/Unchanged
13961# d5: ICTR:LAMBDA
13962# d6: ILOG/Unchanged
13963# d7: k-factor/Unchanged
13964# a0: ptr for original operand/final result
13965# a1: ptr to PTENxx array/Unchanged
13966# a2: x/ptr to FP_SCR1(a6)
13967# fp0: Y/Y with lsb adjusted
13968# fp1: 10^ISCALE/Unchanged
13969# fp2: x/x
13970
13971A10_st:
13972 fmov.l %fpsr,%d0 # get FPSR
13973 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13974 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13975 btst &9,%d0 # check if INEX2 set
13976 beq.b A11_st # if clear, skip rest
13977 or.l &1,8(%a2) # or in 1 to lsb of mantissa
13978 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13979
13980
13981# A11. Restore original FPCR; set size ext.
13982# Perform FINT operation in the user's rounding mode. Keep
13983# the size to extended. The sintdo entry point in the sint
13984# routine expects the FPCR value to be in USER_FPCR for
13985# mode and precision. The original FPCR is saved in L_SCR1.
13986
13987A11_st:
13988 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
13989 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
13990# ;block exceptions
13991
13992
13993# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994# The FPSP routine sintd0 is used. The output is in fp0.
13995#
13996# Register usage:
13997# Input/Output
13998# d0: FPSR with AINEX cleared/FPCR with size set to ext
13999# d2: x/x/scratch
14000# d3: x/x
14001# d4: LEN/Unchanged
14002# d5: ICTR:LAMBDA/Unchanged
14003# d6: ILOG/Unchanged
14004# d7: k-factor/Unchanged
14005# a0: ptr for original operand/src ptr for sintdo
14006# a1: ptr to PTENxx array/Unchanged
14007# a2: ptr to FP_SCR1(a6)/Unchanged
14008# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009# fp0: Y/YINT
14010# fp1: 10^ISCALE/Unchanged
14011# fp2: x/x
14012# F_SCR1:x/x
14013# F_SCR2:Y adjusted for inex/Y with original exponent
14014# L_SCR1:x/original USER_FPCR
14015# L_SCR2:first word of X packed/Unchanged
14016
14017A12_st:
14018 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14019 mov.l L_SCR1(%a6),-(%sp)
14020 mov.l L_SCR2(%a6),-(%sp)
14021
14022 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14023 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14024 tst.l L_SCR2(%a6) # test sign of original operand
14025 bge.b do_fint12 # if pos, use Y
14026 or.l &0x80000000,(%a0) # if neg, use -Y
14027do_fint12:
14028 mov.l USER_FPSR(%a6),-(%sp)
14029# bsr sintdo # sint routine returns int in fp0
14030
14031 fmov.l USER_FPCR(%a6),%fpcr
14032 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14033## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14034## andi.l &0x00000030,%d0
14035## fmov.l %d0,%fpcr
14036 fint.x FP_SCR1(%a6),%fp0 # do fint()
14037 fmov.l %fpsr,%d0
14038 or.w %d0,FPSR_EXCEPT(%a6)
14039## fmov.l &0x0,%fpcr
14040## fmov.l %fpsr,%d0 # don't keep ccodes
14041## or.w %d0,FPSR_EXCEPT(%a6)
14042
14043 mov.b (%sp),USER_FPSR(%a6)
14044 add.l &4,%sp
14045
14046 mov.l (%sp)+,L_SCR2(%a6)
14047 mov.l (%sp)+,L_SCR1(%a6)
14048 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14049
14050 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14051 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14052
14053# A13. Check for LEN digits.
14054# If the int operation results in more than LEN digits,
14055# or less than LEN -1 digits, adjust ILOG and repeat from
14056# A6. This test occurs only on the first pass. If the
14057# result is exactly 10^LEN, decrement ILOG and divide
14058# the mantissa by 10. The calculation of 10^LEN cannot
14059# be inexact, since all powers of ten up to 10^27 are exact
14060# in extended precision, so the use of a previous power-of-ten
14061# table will introduce no error.
14062#
14063#
14064# Register usage:
14065# Input/Output
14066# d0: FPCR with size set to ext/scratch final = 0
14067# d2: x/x
14068# d3: x/scratch final = x
14069# d4: LEN/LEN adjusted
14070# d5: ICTR:LAMBDA/LAMBDA:ICTR
14071# d6: ILOG/ILOG adjusted
14072# d7: k-factor/Unchanged
14073# a0: pointer into memory for packed bcd string formation
14074# a1: ptr to PTENxx array/Unchanged
14075# a2: ptr to FP_SCR1(a6)/Unchanged
14076# fp0: int portion of Y/abs(YINT) adjusted
14077# fp1: 10^ISCALE/Unchanged
14078# fp2: x/10^LEN
14079# F_SCR1:x/x
14080# F_SCR2:Y with original exponent/Unchanged
14081# L_SCR1:original USER_FPCR/Unchanged
14082# L_SCR2:first word of X packed/Unchanged
14083
14084A13_st:
14085 swap %d5 # put ICTR in lower word of d5
14086 tst.w %d5 # check if ICTR = 0
14087 bne not_zr # if non-zero, go to second test
14088#
14089# Compute 10^(LEN-1)
14090#
14091 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14092 mov.l %d4,%d0 # put LEN in d0
14093 subq.l &1,%d0 # d0 = LEN -1
14094 clr.l %d3 # clr table index
14095l_loop:
14096 lsr.l &1,%d0 # shift next bit into carry
14097 bcc.b l_next # if zero, skip the mul
14098 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14099l_next:
14100 add.l &12,%d3 # inc d3 to next pwrten table entry
14101 tst.l %d0 # test if LEN is zero
14102 bne.b l_loop # if not, loop
14103#
14104# 10^LEN-1 is computed for this test and A14. If the input was
14105# denormalized, check only the case in which YINT > 10^LEN.
14106#
14107 tst.b BINDEC_FLG(%a6) # check if input was norm
14108 beq.b A13_con # if norm, continue with checking
14109 fabs.x %fp0 # take abs of YINT
14110 bra test_2
14111#
14112# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113#
14114A13_con:
14115 fabs.x %fp0 # take abs of YINT
14116 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14117 fbge.w test_2 # if greater, do next test
14118 subq.l &1,%d6 # subtract 1 from ILOG
14119 mov.w &1,%d5 # set ICTR
14120 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14121 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14122 bra.w A6_str # return to A6 and recompute YINT
14123test_2:
14124 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14125 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14126 fblt.w A14_st # if less, all is ok, go to A14
14127 fbgt.w fix_ex # if greater, fix and redo
14128 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14129 addq.l &1,%d6 # and inc ILOG
14130 bra.b A14_st # and continue elsewhere
14131fix_ex:
14132 addq.l &1,%d6 # increment ILOG by 1
14133 mov.w &1,%d5 # set ICTR
14134 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14135 bra.w A6_str # return to A6 and recompute YINT
14136#
14137# Since ICTR <> 0, we have already been through one adjustment,
14138# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139# 10^LEN is again computed using whatever table is in a1 since the
14140# value calculated cannot be inexact.
14141#
14142not_zr:
14143 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14144 mov.l %d4,%d0 # put LEN in d0
14145 clr.l %d3 # clr table index
14146z_loop:
14147 lsr.l &1,%d0 # shift next bit into carry
14148 bcc.b z_next # if zero, skip the mul
14149 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14150z_next:
14151 add.l &12,%d3 # inc d3 to next pwrten table entry
14152 tst.l %d0 # test if LEN is zero
14153 bne.b z_loop # if not, loop
14154 fabs.x %fp0 # get abs(YINT)
14155 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14156 fbneq.w A14_st # if not, skip this
14157 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14158 addq.l &1,%d6 # and inc ILOG by 1
14159 addq.l &1,%d4 # and inc LEN
14160 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14161
14162# A14. Convert the mantissa to bcd.
14163# The binstr routine is used to convert the LEN digit
14164# mantissa to bcd in memory. The input to binstr is
14165# to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166# such that the decimal point is to the left of bit 63.
14167# The bcd digits are stored in the correct position in
14168# the final string area in memory.
14169#
14170#
14171# Register usage:
14172# Input/Output
14173# d0: x/LEN call to binstr - final is 0
14174# d1: x/0
14175# d2: x/ms 32-bits of mant of abs(YINT)
14176# d3: x/ls 32-bits of mant of abs(YINT)
14177# d4: LEN/Unchanged
14178# d5: ICTR:LAMBDA/LAMBDA:ICTR
14179# d6: ILOG
14180# d7: k-factor/Unchanged
14181# a0: pointer into memory for packed bcd string formation
14182# /ptr to first mantissa byte in result string
14183# a1: ptr to PTENxx array/Unchanged
14184# a2: ptr to FP_SCR1(a6)/Unchanged
14185# fp0: int portion of Y/abs(YINT) adjusted
14186# fp1: 10^ISCALE/Unchanged
14187# fp2: 10^LEN/Unchanged
14188# F_SCR1:x/Work area for final result
14189# F_SCR2:Y with original exponent/Unchanged
14190# L_SCR1:original USER_FPCR/Unchanged
14191# L_SCR2:first word of X packed/Unchanged
14192
14193A14_st:
14194 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14195 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14196 lea.l FP_SCR0(%a6),%a0
14197 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14198 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14199 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14200 clr.l 4(%a0) # zero word 2 of FP_RES
14201 clr.l 8(%a0) # zero word 3 of FP_RES
14202 mov.l (%a0),%d0 # move exponent to d0
14203 swap %d0 # put exponent in lower word
14204 beq.b no_sft # if zero, don't shift
14205 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14206 tst.l %d0 # check if > 1
14207 bgt.b no_sft # if so, don't shift
14208 neg.l %d0 # make exp positive
14209m_loop:
14210 lsr.l &1,%d2 # shift d2:d3 right, add 0s
14211 roxr.l &1,%d3 # the number of places
14212 dbf.w %d0,m_loop # given in d0
14213no_sft:
14214 tst.l %d2 # check for mantissa of zero
14215 bne.b no_zr # if not, go on
14216 tst.l %d3 # continue zero check
14217 beq.b zer_m # if zero, go directly to binstr
14218no_zr:
14219 clr.l %d1 # put zero in d1 for addx
14220 add.l &0x00000080,%d3 # inc at bit 7
14221 addx.l %d1,%d2 # continue inc
14222 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14223zer_m:
14224 mov.l %d4,%d0 # put LEN in d0 for binstr call
14225 addq.l &3,%a0 # a0 points to M16 byte in result
14226 bsr binstr # call binstr to convert mant
14227
14228
14229# A15. Convert the exponent to bcd.
14230# As in A14 above, the exp is converted to bcd and the
14231# digits are stored in the final string.
14232#
14233# Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234#
14235# 32 16 15 0
14236# -----------------------------------------
14237# | 0 | e3 | e2 | e1 | e4 | X | X | X |
14238# -----------------------------------------
14239#
14240# And are moved into their proper places in FP_SCR0. If digit e4
14241# is non-zero, OPERR is signaled. In all cases, all 4 digits are
14242# written as specified in the 881/882 manual for packed decimal.
14243#
14244# Register usage:
14245# Input/Output
14246# d0: x/LEN call to binstr - final is 0
14247# d1: x/scratch (0);shift count for final exponent packing
14248# d2: x/ms 32-bits of exp fraction/scratch
14249# d3: x/ls 32-bits of exp fraction
14250# d4: LEN/Unchanged
14251# d5: ICTR:LAMBDA/LAMBDA:ICTR
14252# d6: ILOG
14253# d7: k-factor/Unchanged
14254# a0: ptr to result string/ptr to L_SCR1(a6)
14255# a1: ptr to PTENxx array/Unchanged
14256# a2: ptr to FP_SCR1(a6)/Unchanged
14257# fp0: abs(YINT) adjusted/float(ILOG)
14258# fp1: 10^ISCALE/Unchanged
14259# fp2: 10^LEN/Unchanged
14260# F_SCR1:Work area for final result/BCD result
14261# F_SCR2:Y with original exponent/ILOG/10^4
14262# L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263# L_SCR2:first word of X packed/Unchanged
14264
14265A15_st:
14266 tst.b BINDEC_FLG(%a6) # check for denorm
14267 beq.b not_denorm
14268 ftest.x %fp0 # test for zero
14269 fbeq.w den_zero # if zero, use k-factor or 4933
14270 fmov.l %d6,%fp0 # float ILOG
14271 fabs.x %fp0 # get abs of ILOG
14272 bra.b convrt
14273den_zero:
14274 tst.l %d7 # check sign of the k-factor
14275 blt.b use_ilog # if negative, use ILOG
14276 fmov.s F4933(%pc),%fp0 # force exponent to 4933
14277 bra.b convrt # do it
14278use_ilog:
14279 fmov.l %d6,%fp0 # float ILOG
14280 fabs.x %fp0 # get abs of ILOG
14281 bra.b convrt
14282not_denorm:
14283 ftest.x %fp0 # test for zero
14284 fbneq.w not_zero # if zero, force exponent
14285 fmov.s FONE(%pc),%fp0 # force exponent to 1
14286 bra.b convrt # do it
14287not_zero:
14288 fmov.l %d6,%fp0 # float ILOG
14289 fabs.x %fp0 # get abs of ILOG
14290convrt:
14291 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14292 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14293 mov.l 4(%a2),%d2 # move word 2 to d2
14294 mov.l 8(%a2),%d3 # move word 3 to d3
14295 mov.w (%a2),%d0 # move exp to d0
14296 beq.b x_loop_fin # if zero, skip the shift
14297 sub.w &0x3ffd,%d0 # subtract off bias
14298 neg.w %d0 # make exp positive
14299x_loop:
14300 lsr.l &1,%d2 # shift d2:d3 right
14301 roxr.l &1,%d3 # the number of places
14302 dbf.w %d0,x_loop # given in d0
14303x_loop_fin:
14304 clr.l %d1 # put zero in d1 for addx
14305 add.l &0x00000080,%d3 # inc at bit 6
14306 addx.l %d1,%d2 # continue inc
14307 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14308 mov.l &4,%d0 # put 4 in d0 for binstr call
14309 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14310 bsr binstr # call binstr to convert exp
14311 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14312 mov.l &12,%d1 # use d1 for shift count
14313 lsr.l %d1,%d0 # shift d0 right by 12
14314 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14315 lsr.l %d1,%d0 # shift d0 right by 12
14316 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14317 tst.b %d0 # check if e4 is zero
14318 beq.b A16_st # if zero, skip rest
14319 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14320
14321
14322# A16. Write sign bits to final string.
14323# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324#
14325# Register usage:
14326# Input/Output
14327# d0: x/scratch - final is x
14328# d2: x/x
14329# d3: x/x
14330# d4: LEN/Unchanged
14331# d5: ICTR:LAMBDA/LAMBDA:ICTR
14332# d6: ILOG/ILOG adjusted
14333# d7: k-factor/Unchanged
14334# a0: ptr to L_SCR1(a6)/Unchanged
14335# a1: ptr to PTENxx array/Unchanged
14336# a2: ptr to FP_SCR1(a6)/Unchanged
14337# fp0: float(ILOG)/Unchanged
14338# fp1: 10^ISCALE/Unchanged
14339# fp2: 10^LEN/Unchanged
14340# F_SCR1:BCD result with correct signs
14341# F_SCR2:ILOG/10^4
14342# L_SCR1:Exponent digits on return from binstr
14343# L_SCR2:first word of X packed/Unchanged
14344
14345A16_st:
14346 clr.l %d0 # clr d0 for collection of signs
14347 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14348 tst.l L_SCR2(%a6) # check sign of original mantissa
14349 bge.b mant_p # if pos, don't set SM
14350 mov.l &2,%d0 # move 2 in to d0 for SM
14351mant_p:
14352 tst.l %d6 # check sign of ILOG
14353 bge.b wr_sgn # if pos, don't set SE
14354 addq.l &1,%d0 # set bit 0 in d0 for SE
14355wr_sgn:
14356 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14357
14358# Clean up and restore all registers used.
14359
14360 fmov.l &0,%fpsr # clear possible inex2/ainex bits
14361 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14362 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14363 rts
14364
14365 global PTENRN
14366PTENRN:
14367 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14368 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14369 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14370 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14371 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14372 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14373 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14374 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14375 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14376 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14377 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14378 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14379 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14380
14381 global PTENRP
14382PTENRP:
14383 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14384 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14385 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14386 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14387 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14388 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14389 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14390 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14391 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14392 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14393 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14394 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14395 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14396
14397 global PTENRM
14398PTENRM:
14399 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14400 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14401 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14402 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14403 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14404 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14405 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14406 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14407 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14408 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14409 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14410 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14411 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14412
14413#########################################################################
14414# binstr(): Converts a 64-bit binary integer to bcd. #
14415# #
14416# INPUT *************************************************************** #
14417# d2:d3 = 64-bit binary integer #
14418# d0 = desired length (LEN) #
14419# a0 = pointer to start in memory for bcd characters #
14420# (This pointer must point to byte 4 of the first #
14421# lword of the packed decimal memory string.) #
14422# #
14423# OUTPUT ************************************************************** #
14424# a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14425# #
14426# ALGORITHM *********************************************************** #
14427# The 64-bit binary is assumed to have a decimal point before #
14428# bit 63. The fraction is multiplied by 10 using a mul by 2 #
14429# shift and a mul by 8 shift. The bits shifted out of the #
14430# msb form a decimal digit. This process is iterated until #
14431# LEN digits are formed. #
14432# #
14433# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14434# digit formed will be assumed the least significant. This is #
14435# to force the first byte formed to have a 0 in the upper 4 bits. #
14436# #
14437# A2. Beginning of the loop: #
14438# Copy the fraction in d2:d3 to d4:d5. #
14439# #
14440# A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14441# extracts and shifts. The three msbs from d2 will go into d1. #
14442# #
14443# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14444# will be collected by the carry. #
14445# #
14446# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14447# into d2:d3. D1 will contain the bcd digit formed. #
14448# #
14449# A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14450# zero, it is the ls digit. Put the digit in its place in the #
14451# upper word of d0. If it is the ls digit, write the word #
14452# from d0 to memory. #
14453# #
14454# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14455# #
14456#########################################################################
14457
14458# Implementation Notes:
14459#
14460# The registers are used as follows:
14461#
14462# d0: LEN counter
14463# d1: temp used to form the digit
14464# d2: upper 32-bits of fraction for mul by 8
14465# d3: lower 32-bits of fraction for mul by 8
14466# d4: upper 32-bits of fraction for mul by 2
14467# d5: lower 32-bits of fraction for mul by 2
14468# d6: temp for bit-field extracts
14469# d7: byte digit formation word;digit count {0,1}
14470# a0: pointer into memory for packed bcd string formation
14471#
14472
14473 global binstr
14474binstr:
14475 movm.l &0xff00,-(%sp) # {%d0-%d7}
14476
14477#
14478# A1: Init d7
14479#
14480 mov.l &1,%d7 # init d7 for second digit
14481 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14482#
14483# A2. Copy d2:d3 to d4:d5. Start loop.
14484#
14485loop:
14486 mov.l %d2,%d4 # copy the fraction before muls
14487 mov.l %d3,%d5 # to d4:d5
14488#
14489# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490#
14491 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14492 asl.l &3,%d2 # shift d2 left by 3 places
14493 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14494 asl.l &3,%d3 # shift d3 left by 3 places
14495 or.l %d6,%d2 # or in msbs from d3 into d2
14496#
14497# A4. Multiply d4:d5 by 2; add carry out to d1.
14498#
14499 asl.l &1,%d5 # mul d5 by 2
14500 roxl.l &1,%d4 # mul d4 by 2
14501 swap %d6 # put 0 in d6 lower word
14502 addx.w %d6,%d1 # add in extend from mul by 2
14503#
14504# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14505#
14506 add.l %d5,%d3 # add lower 32 bits
14507 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508 addx.l %d4,%d2 # add with extend upper 32 bits
14509 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510 addx.w %d6,%d1 # add in extend from add to d1
14511 swap %d6 # with d6 = 0; put 0 in upper word
14512#
14513# A6. Test d7 and branch.
14514#
14515 tst.w %d7 # if zero, store digit & to loop
14516 beq.b first_d # if non-zero, form byte & write
14517sec_d:
14518 swap %d7 # bring first digit to word d7b
14519 asl.w &4,%d7 # first digit in upper 4 bits d7b
14520 add.w %d1,%d7 # add in ls digit to d7b
14521 mov.b %d7,(%a0)+ # store d7b byte in memory
14522 swap %d7 # put LEN counter in word d7a
14523 clr.w %d7 # set d7a to signal no digits done
14524 dbf.w %d0,loop # do loop some more!
14525 bra.b end_bstr # finished, so exit
14526first_d:
14527 swap %d7 # put digit word in d7b
14528 mov.w %d1,%d7 # put new digit in d7b
14529 swap %d7 # put LEN counter in word d7a
14530 addq.w &1,%d7 # set d7a to signal first digit done
14531 dbf.w %d0,loop # do loop some more!
14532 swap %d7 # put last digit in string
14533 lsl.w &4,%d7 # move it to upper 4 bits
14534 mov.b %d7,(%a0)+ # store it in memory string
14535#
14536# Clean up and return with result in fp0.
14537#
14538end_bstr:
14539 movm.l (%sp)+,&0xff # {%d0-%d7}
14540 rts
14541
14542#########################################################################
14543# XDEF **************************************************************** #
14544# facc_in_b(): dmem_read_byte failed #
14545# facc_in_w(): dmem_read_word failed #
14546# facc_in_l(): dmem_read_long failed #
14547# facc_in_d(): dmem_read of dbl prec failed #
14548# facc_in_x(): dmem_read of ext prec failed #
14549# #
14550# facc_out_b(): dmem_write_byte failed #
14551# facc_out_w(): dmem_write_word failed #
14552# facc_out_l(): dmem_write_long failed #
14553# facc_out_d(): dmem_write of dbl prec failed #
14554# facc_out_x(): dmem_write of ext prec failed #
14555# #
14556# XREF **************************************************************** #
14557# _real_access() - exit through access error handler #
14558# #
14559# INPUT *************************************************************** #
14560# None #
14561# #
14562# OUTPUT ************************************************************** #
14563# None #
14564# #
14565# ALGORITHM *********************************************************** #
14566# Flow jumps here when an FP data fetch call gets an error #
14567# result. This means the operating system wants an access error frame #
14568# made out of the current exception stack frame. #
14569# So, we first call restore() which makes sure that any updated #
14570# -(an)+ register gets returned to its pre-exception value and then #
14571# we change the stack to an access error stack frame. #
14572# #
14573#########################################################################
14574
14575facc_in_b:
14576 movq.l &0x1,%d0 # one byte
14577 bsr.w restore # fix An
14578
14579 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14580 bra.w facc_finish
14581
14582facc_in_w:
14583 movq.l &0x2,%d0 # two bytes
14584 bsr.w restore # fix An
14585
14586 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14587 bra.b facc_finish
14588
14589facc_in_l:
14590 movq.l &0x4,%d0 # four bytes
14591 bsr.w restore # fix An
14592
14593 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14594 bra.b facc_finish
14595
14596facc_in_d:
14597 movq.l &0x8,%d0 # eight bytes
14598 bsr.w restore # fix An
14599
14600 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14601 bra.b facc_finish
14602
14603facc_in_x:
14604 movq.l &0xc,%d0 # twelve bytes
14605 bsr.w restore # fix An
14606
14607 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14608 bra.b facc_finish
14609
14610################################################################
14611
14612facc_out_b:
14613 movq.l &0x1,%d0 # one byte
14614 bsr.w restore # restore An
14615
14616 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14617 bra.b facc_finish
14618
14619facc_out_w:
14620 movq.l &0x2,%d0 # two bytes
14621 bsr.w restore # restore An
14622
14623 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14624 bra.b facc_finish
14625
14626facc_out_l:
14627 movq.l &0x4,%d0 # four bytes
14628 bsr.w restore # restore An
14629
14630 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14631 bra.b facc_finish
14632
14633facc_out_d:
14634 movq.l &0x8,%d0 # eight bytes
14635 bsr.w restore # restore An
14636
14637 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14638 bra.b facc_finish
14639
14640facc_out_x:
14641 mov.l &0xc,%d0 # twelve bytes
14642 bsr.w restore # restore An
14643
14644 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14645
14646# here's where we actually create the access error frame from the
14647# current exception stack frame.
14648facc_finish:
14649 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650
14651 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14652 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14654
14655 unlk %a6
14656
14657 mov.l (%sp),-(%sp) # store SR, hi(PC)
14658 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14659 mov.l 0xc(%sp),0x8(%sp) # store EA
14660 mov.l &0x00000001,0xc(%sp) # store FSLW
14661 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14662 mov.w &0x4008,0x6(%sp) # store voff
14663
14664 btst &0x5,(%sp) # supervisor or user mode?
14665 beq.b facc_out2 # user
14666 bset &0x2,0xd(%sp) # set supervisor TM bit
14667
14668facc_out2:
14669 bra.l _real_access
14670
14671##################################################################
14672
14673# if the effective addressing mode was predecrement or postincrement,
14674# the emulation has already changed its value to the correct post-
14675# instruction value. but since we're exiting to the access error
14676# handler, then AN must be returned to its pre-instruction value.
14677# we do that here.
14678restore:
14679 mov.b EXC_OPWORD+0x1(%a6),%d1
14680 andi.b &0x38,%d1 # extract opmode
14681 cmpi.b %d1,&0x18 # postinc?
14682 beq.w rest_inc
14683 cmpi.b %d1,&0x20 # predec?
14684 beq.w rest_dec
14685 rts
14686
14687rest_inc:
14688 mov.b EXC_OPWORD+0x1(%a6),%d1
14689 andi.w &0x0007,%d1 # fetch An
14690
14691 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14693
14694tbl_rest_inc:
14695 short ri_a0 - tbl_rest_inc
14696 short ri_a1 - tbl_rest_inc
14697 short ri_a2 - tbl_rest_inc
14698 short ri_a3 - tbl_rest_inc
14699 short ri_a4 - tbl_rest_inc
14700 short ri_a5 - tbl_rest_inc
14701 short ri_a6 - tbl_rest_inc
14702 short ri_a7 - tbl_rest_inc
14703
14704ri_a0:
14705 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14706 rts
14707ri_a1:
14708 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14709 rts
14710ri_a2:
14711 sub.l %d0,%a2 # fix a2
14712 rts
14713ri_a3:
14714 sub.l %d0,%a3 # fix a3
14715 rts
14716ri_a4:
14717 sub.l %d0,%a4 # fix a4
14718 rts
14719ri_a5:
14720 sub.l %d0,%a5 # fix a5
14721 rts
14722ri_a6:
14723 sub.l %d0,(%a6) # fix stacked a6
14724 rts
14725# if it's a fmove out instruction, we don't have to fix a7
14726# because we hadn't changed it yet. if it's an opclass two
14727# instruction (data moved in) and the exception was in supervisor
14728# mode, then also also wasn't updated. if it was user mode, then
14729# restore the correct a7 which is in the USP currently.
14730ri_a7:
14731 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14732 bne.b ri_a7_done # out
14733
14734 btst &0x5,EXC_SR(%a6) # user or supervisor?
14735 bne.b ri_a7_done # supervisor
14736 movc %usp,%a0 # restore USP
14737 sub.l %d0,%a0
14738 movc %a0,%usp
14739ri_a7_done:
14740 rts
14741
14742# need to invert adjustment value if the <ea> was predec
14743rest_dec:
14744 neg.l %d0
14745 bra.b rest_inc