/[pearpc]/src/cpu/cpu_generic/ppc_fpu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/cpu/cpu_generic/ppc_fpu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 34024 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * ppc_fpu.cc
4     *
5     * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6     * Copyright (C) 2003 Stefan Weyergraf
7     *
8     * This program is free software; you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License version 2 as
10     * published by the Free Software Foundation.
11     *
12     * This program is distributed in the hope that it will be useful,
13     * but WITHOUT ANY WARRANTY; without even the implied warranty of
14     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     * GNU General Public License for more details.
16     *
17     * You should have received a copy of the GNU General Public License
18     * along with this program; if not, write to the Free Software
19     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20     */
21    
22     #include "debug/tracers.h"
23     #include "ppc_cpu.h"
24     #include "ppc_dec.h"
25     #include "ppc_fpu.h"
26    
27     // .121
28    
29    
30     #define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b))
31     inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b)
32     {
33     switch (PPC_FPR_TYPE2(a.type, b.type)) {
34     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
35     int diff = a.e - b.e;
36     if (diff<0) {
37     diff = -diff;
38     if (diff <= 56) {
39     a.m >>= diff;
40     } else if (a.m != 0) {
41     a.m = 1;
42     } else {
43     a.m = 0;
44     }
45     res.e = b.e;
46     } else {
47     if (diff <= 56) {
48     b.m >>= diff;
49     } else if (b.m != 0) {
50     b.m = 1;
51     } else {
52     b.m = 0;
53     }
54     res.e = a.e;
55     }
56     res.type = ppc_fpr_norm;
57     if (a.s == b.s) {
58     res.s = a.s;
59     res.m = a.m + b.m;
60     if (res.m & (1ULL<<56)) {
61     res.m >>= 1;
62     res.e++;
63     }
64     } else {
65     res.s = a.s;
66     res.m = a.m - b.m;
67     if (!res.m) {
68     if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
69     res.s |= b.s;
70     } else {
71     res.s &= b.s;
72     }
73     res.type = ppc_fpr_zero;
74     } else {
75     if ((sint64)res.m < 0) {
76     res.m = b.m - a.m;
77     res.s = b.s;
78     }
79     diff = ppc_fpu_normalize(res) - 8;
80     res.e -= diff;
81     res.m <<= diff;
82     }
83     }
84     break;
85     }
86     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
87     res.s = a.s;
88     res.type = ppc_fpr_NaN;
89     break;
90     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
91     res.e = a.e;
92     // fall-thru
93     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
94     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
95     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
96     res.s = a.s;
97     res.m = a.m;
98     res.type = a.type;
99     break;
100     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
101     res.e = b.e;
102     // fall-thru
103     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
104     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
105     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
106     res.s = b.s;
107     res.m = b.m;
108     res.type = b.type;
109     break;
110     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
111     if (a.s != b.s) {
112     // +oo + -oo == NaN
113     res.s = a.s ^ b.s;
114     res.type = ppc_fpr_NaN;
115     break;
116     }
117     // fall-thru
118     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
119     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
120     res.s = a.s;
121     res.type = a.type;
122     break;
123     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
124     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
125     res.s = b.s;
126     res.type = b.type;
127     break;
128     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
129     // round bla
130     res.type = ppc_fpr_zero;
131     res.s = a.s && b.s;
132     break;
133     }
134     }
135    
136     inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp)
137     {
138     if (exp >= 64) {
139     q.m1 = q.m0;
140     q.m0 = 0;
141     exp -= 64;
142     }
143     uint64 t = q.m0 & ((1ULL<<exp)-1);
144     q.m0 >>= exp;
145     q.m1 >>= exp;
146     q.m1 |= t<<(64-exp);
147     }
148    
149     inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp)
150     {
151     if (exp >= 64) {
152     q.m0 = q.m1;
153     q.m1 = 0;
154     exp -= 64;
155     }
156     uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1);
157     q.m0 <<= exp;
158     q.m1 <<= exp;
159     q.m0 |= t;
160     }
161    
162     inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
163     {
164     res.m1 = a.m1+b.m1;
165     if (res.m1 < a.m1) {
166     res.m0 = a.m0+b.m0+1;
167     } else {
168     res.m0 = a.m0+b.m0;
169     }
170     }
171    
172     inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
173     {
174     res.m1 = a.m1-b.m1;
175     if (a.m1 < b.m1) {
176     res.m0 = a.m0-b.m0-1;
177     } else {
178     res.m0 = a.m0-b.m0;
179     }
180     }
181    
182     // res has 107 significant bits. a, b have 106 significant bits each.
183     inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b)
184     {
185     // treat as 107 bit mantissa
186     if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1);
187     if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1);
188     switch (PPC_FPR_TYPE2(a.type, b.type)) {
189     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
190     int diff = a.e - b.e;
191     if (diff < 0) {
192     diff = -diff;
193     if (diff <= 107) {
194     // FIXME: may set x_prime
195     ppc_fpu_quadro_mshr(a, diff);
196     } else if (a.m0 || a.m1) {
197     a.m0 = 0;
198     a.m1 = 1;
199     } else {
200     a.m0 = 0;
201     a.m1 = 0;
202     }
203     res.e = b.e;
204     } else {
205     if (diff <= 107) {
206     // FIXME: may set x_prime
207     ppc_fpu_quadro_mshr(b, diff);
208     } else if (b.m0 || b.m1) {
209     b.m0 = 0;
210     b.m1 = 1;
211     } else {
212     b.m0 = 0;
213     b.m1 = 0;
214     }
215     res.e = a.e;
216     }
217     res.type = ppc_fpr_norm;
218     if (a.s == b.s) {
219     res.s = a.s;
220     ppc_fpu_add_quadro_m(res, a, b);
221     int X_prime = res.m1 & 1;
222     if (res.m0 & (1ULL<<(107-64))) {
223     ppc_fpu_quadro_mshr(res, 1);
224     res.e++;
225     }
226     // res = [107]
227     res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime;
228     } else {
229     res.s = a.s;
230     int cmp;
231     if (a.m0 < b.m0) {
232     cmp = -1;
233     } else if (a.m0 > b.m0) {
234     cmp = +1;
235     } else {
236     if (a.m1 < b.m1) {
237     cmp = -1;
238     } else if (a.m1 > b.m1) {
239     cmp = +1;
240     } else {
241     cmp = 0;
242     }
243     }
244     if (!cmp) {
245     if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
246     res.s |= b.s;
247     } else {
248     res.s &= b.s;
249     }
250     res.type = ppc_fpr_zero;
251     } else {
252     if (cmp < 0) {
253     ppc_fpu_sub_quadro_m(res, b, a);
254     res.s = b.s;
255     } else {
256     ppc_fpu_sub_quadro_m(res, a, b);
257     }
258     diff = ppc_fpu_normalize_quadro(res) - (128-107);
259     int X_prime = res.m1 & 1;
260     res.m1 &= 0xfffffffffffffffeULL;
261     ppc_fpu_quadro_mshl(res, diff);
262     res.e -= diff;
263     res.m1 |= X_prime;
264     }
265     // res = [107]
266     }
267     break;
268     }
269     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
270     res.s = a.s;
271     res.type = ppc_fpr_NaN;
272     break;
273     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
274     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
275     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
276     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
277     res.e = a.e;
278     res.s = a.s;
279     res.m0 = a.m0;
280     res.m1 = a.m1;
281     res.type = a.type;
282     break;
283     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
284     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
285     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
286     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
287     res.e = b.e;
288     res.s = b.s;
289     res.m0 = b.m0;
290     res.m1 = b.m1;
291     res.type = b.type;
292     break;
293     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
294     if (a.s != b.s) {
295     // +oo + -oo == NaN
296     res.s = a.s ^ b.s;
297     res.type = ppc_fpr_NaN;
298     break;
299     }
300     // fall-thru
301     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
302     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
303     res.s = a.s;
304     res.type = a.type;
305     break;
306     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
307     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
308     res.s = b.s;
309     res.type = b.type;
310     break;
311     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
312     // round bla
313     res.type = ppc_fpr_zero;
314     res.s = a.s && b.s;
315     break;
316     }
317     }
318    
319     inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry)
320     {
321     carry = (a+b < a) ? 1 : 0;
322     a += b;
323     }
324    
325     // 'res' has 56 significant bits on return, a + b have 56 significant bits each
326     inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b)
327     {
328     res.s = a.s ^ b.s;
329     switch (PPC_FPR_TYPE2(a.type, b.type)) {
330     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
331     res.type = ppc_fpr_norm;
332     res.e = a.e + b.e;
333     // printf("new exp: %d\n", res.e);
334     // ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m);
335     uint64 fH, fM1, fM2, fL;
336     fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
337     fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
338     fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
339     fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
340     // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
341    
342     // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
343     uint64 rL, rH;
344     rL = fL; // rL = rH = [63,64]
345     rH = fH; // rH = fH = [47,48]
346     uint64 split;
347     split = fM1 + fM2;
348     uint64 carry;
349     ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
350     rH += carry; // rH = [0 .. 2^48]
351     rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
352    
353     // res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55]
354     // [---------------------------------------------------------]
355     // bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ]
356     // [---------------------------------------------------------]
357     // [15 bits zero | 49 bits rH | 8 most sign.bits rL ]
358     res.m = rH << 9;
359     res.m |= rL >> (64-9);
360     // res.m = [58]
361    
362     // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
363     if (res.m & (1ULL << 57)) {
364     res.m >>= 2;
365     res.e += 2;
366     } else if (res.m & (1ULL << 56)) {
367     res.m >>= 1;
368     res.e++;
369     }
370     // res.m = [56]
371     break;
372     }
373     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
374     res.type = a.type;
375     res.e = a.e;
376     break;
377     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
378     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
379     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
380     res.s = a.s;
381     // fall-thru
382     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
383     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
384     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
385     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
386     res.type = a.type;
387     break;
388     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
389     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
390     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
391     res.s = b.s;
392     // fall-thru
393     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
394     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
395     res.type = b.type;
396     break;
397     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
398     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
399     res.type = ppc_fpr_NaN;
400     break;
401     }
402     }
403    
404     // 'res' has 'prec' significant bits on return, a + b have 56 significant bits each
405     // for 111 >= prec >= 64
406     inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec)
407     {
408     res.s = a.s ^ b.s;
409     switch (PPC_FPR_TYPE2(a.type, b.type)) {
410     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
411     res.type = ppc_fpr_norm;
412     res.e = a.e + b.e;
413     // printf("new exp: %d\n", res.e);
414     // ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m);
415     uint64 fH, fM1, fM2, fL;
416     fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
417     fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
418     fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
419     fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
420     // ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL);
421    
422     // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
423     uint64 rL, rH;
424     rL = fL; // rL = rH = [63,64]
425     rH = fH; // rH = fH = [47,48]
426     uint64 split;
427     split = fM1 + fM2;
428     uint64 carry;
429     ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
430     rH += carry; // rH = [0 .. 2^48]
431     rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
432    
433     // res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0]
434     // [-----------------------------------------------------------]
435     // log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ]
436     // [-----------------------------------------------------------]
437     // [ 15 bits zero | 49 bits rH | 64 bits rL ]
438     res.m0 = rH;
439     res.m1 = rL;
440     // res.m0|res.m1 = [111,112,113]
441    
442     // ht_printf("res = %016qx%016qx\n", res.m0, res.m1);
443     if (res.m0 & (1ULL << 48)) {
444     ppc_fpu_quadro_mshr(res, 2+(111-prec));
445     res.e += 2;
446     } else if (res.m0 & (1ULL << 47)) {
447     ppc_fpu_quadro_mshr(res, 1+(111-prec));
448     res.e += 1;
449     } else {
450     ppc_fpu_quadro_mshr(res, 111-prec);
451     }
452     // res.m0|res.m1 = [prec]
453     break;
454     }
455     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
456     res.type = a.type;
457     res.e = a.e;
458     break;
459     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
460     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
461     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
462     res.s = a.s;
463     // fall-thru
464     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
465     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
466     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
467     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
468     res.type = a.type;
469     break;
470     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
471     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
472     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
473     res.s = b.s;
474     // fall-thru
475     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
476     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
477     res.type = b.type;
478     break;
479     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
480     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
481     res.type = ppc_fpr_NaN;
482     break;
483     }
484     }
485    
486     // calculate one of these:
487     // + m1 * m2 + s
488     // + m1 * m2 - s
489     // - m1 * m2 + s
490     // - m1 * m2 - s
491     // using a 106 bit accumulator
492     //
493     // .752
494     //
495     // FIXME: There is a bug in this code that shows up in Mac OS X Finder fwd/bwd
496     // button: the top line is not rendered correctly. This works with the jitc_x86
497     // FPU however...
498     inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2,
499     ppc_double &s)
500     {
501     ppc_quadro p;
502     /* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e,
503     ppc_fpu_get_fpr_type(m1.type));
504     ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e,
505     ppc_fpu_get_fpr_type(m2.type));*/
506     // create product with 106 significant bits
507     ppc_fpu_mul_quadro(p, m1, m2, 106);
508     /* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e,
509     ppc_fpu_get_fpr_type(p.type));*/
510     // convert s into ppc_quadro
511     /* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e,
512     ppc_fpu_get_fpr_type(s.type));*/
513     ppc_quadro q;
514     q.e = s.e;
515     q.s = s.s;
516     q.type = s.type;
517     q.m0 = 0;
518     q.m1 = s.m;
519     // .. with 106 significant bits
520     ppc_fpu_quadro_mshl(q, 106-56);
521     /* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e,
522     ppc_fpu_get_fpr_type(q.type));*/
523     // now we must add p, q.
524     ppc_quadro x;
525     ppc_fpu_add_quadro(x, p, q);
526     // x = [107]
527     /* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e,
528     ppc_fpu_get_fpr_type(x.type));*/
529     res.type = x.type;
530     res.s = x.s;
531     res.e = x.e;
532     if (x.type == ppc_fpr_norm) {
533     res.m = x.m0 << 13; // 43 bits from m0
534     res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1
535     res.m |= x.m1 & 1; // X' bit from m1
536     }
537     /* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e,
538     ppc_fpu_get_fpr_type(res.type));*/
539     }
540    
541     inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b)
542     {
543     res.s = a.s ^ b.s;
544     switch (PPC_FPR_TYPE2(a.type, b.type)) {
545     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
546     res.type = ppc_fpr_norm;
547     res.e = a.e - b.e;
548     res.m = 0;
549     uint64 am = a.m, bm = b.m;
550     uint i = 0;
551     while (am && (i<56)) {
552     res.m <<= 1;
553     if (am >= bm) {
554     res.m |= 1;
555     am -= bm;
556     }
557     am <<= 1;
558     // printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
559     i++;
560     }
561     res.m <<= 57-i;
562     if (res.m & (1ULL << 56)) {
563     res.m >>= 1;
564     } else {
565     res.e--;
566     }
567     // printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
568     break;
569     }
570     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
571     res.e = a.e;
572     // fall-thru
573     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
574     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
575     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
576     res.s = a.s;
577     // fall-thru
578     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
579     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
580     res.type = a.type;
581     break;
582     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
583     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
584     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
585     res.s = b.s;
586     res.type = b.type;
587     break;
588     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
589     res.type = ppc_fpr_zero;
590     break;
591     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
592     res.type = ppc_fpr_Inf;
593     break;
594     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
595     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
596     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
597     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
598     res.type = ppc_fpr_NaN;
599     break;
600     }
601     }
602    
603     inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B)
604     {
605     switch (B.type) {
606     case ppc_fpr_norm:
607     if (B.s) {
608     D.type = ppc_fpr_NaN;
609     gCPU.fpscr |= FPSCR_VXSQRT;
610     break;
611     }
612     // D := 1/2(D_old + B/D_old)
613     D = B;
614     D.e /= 2;
615     for (int i=0; i<6; i++) {
616     ppc_double D_old = D;
617     ppc_double B_div_D_old;
618     ppc_fpu_div(B_div_D_old, B, D_old);
619     ppc_fpu_add(D, D_old, B_div_D_old);
620     D.e--;
621    
622     /* uint64 e;
623     ppc_double E = D;
624     ppc_fpu_pack_double(E, e);
625     printf("%.20f\n", *(double *)&e);*/
626     }
627     break;
628     case ppc_fpr_zero:
629     D.type = ppc_fpr_zero;
630     D.s = B.s;
631     break;
632     case ppc_fpr_Inf:
633     if (B.s) {
634     D.type = ppc_fpr_NaN;
635     gCPU.fpscr |= FPSCR_VXSQRT;
636     } else {
637     D.type = ppc_fpr_Inf;
638     D.s = 0;
639     }
640     break;
641     case ppc_fpr_NaN:
642     D.type = ppc_fpr_NaN;
643     break;
644     }
645     }
646    
647     void ppc_fpu_test()
648     {
649     ppc_double A, B, C;
650     double a, b, c;
651     A.type = B.type = ppc_fpr_norm;
652     A.s = 1;
653     A.e = 0;
654     A.m = 0;
655     A.m = ((1ULL<<56)-1)-((1ULL<<10)-1);
656     ht_printf("%qb\n", A.m);
657     B.s = 1;
658     B.e = 0;
659     B.m = 0;
660     B.m = ((1ULL<<56)-1)-((1ULL<<50)-1);
661     a = ppc_fpu_get_double(A);
662     b = ppc_fpu_get_double(B);
663     printf("%f + %f = \n", a, b);
664     ppc_fpu_add(C, A, B);
665     uint64 d;
666     uint32 s;
667     ppc_fpu_pack_double_as_single(C, d);
668     ht_printf("%064qb\n", d);
669     ppc_fpu_unpack_double(C, d);
670     ppc_fpu_pack_single(C, s);
671     ht_printf("single: %032b\n", s);
672     ppc_single Cs;
673     ppc_fpu_unpack_single(Cs, s);
674     ppc_fpu_single_to_double(Cs, C);
675     // ht_printf("%d\n", ppc_fpu_double_to_int(C));
676     c = ppc_fpu_get_double(C);
677     printf("%f\n", c);
678     }
679    
680     /*
681     * a and b must not be NaNs
682     */
683     inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b)
684     {
685     if (a.type == ppc_fpr_zero) {
686     if (b.type == ppc_fpr_zero) return 2;
687     return (b.s) ? 4: 8;
688     }
689     if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4;
690     if (a.s != b.s) return (a.s) ? 8: 4;
691     if (a.e > b.e) return (a.s) ? 8: 4;
692     if (a.e < b.e) return (a.s) ? 4: 8;
693     if (a.m > b.m) return (a.s) ? 8: 4;
694     if (a.m < b.m) return (a.s) ? 4: 8;
695     return 2;
696     }
697    
698     double ppc_fpu_get_double(uint64 d)
699     {
700     ppc_double dd;
701     ppc_fpu_unpack_double(dd, d);
702     return ppc_fpu_get_double(dd);
703     }
704    
705     double ppc_fpu_get_double(ppc_double &d)
706     {
707     if (d.type == ppc_fpr_norm) {
708     double r = d.m;
709     for (int i=0; i<55; i++) {
710     r = r / 2.0;
711     }
712     if (d.e < 0) {
713     for (int i=0; i>d.e; i--) {
714     r = r / 2.0;
715     }
716     } else if (d.e > 0) {
717     for (int i=0; i<d.e; i++) {
718     r = r * 2.0;
719     }
720     }
721     if (d.s) r = -r;
722     return r;
723     } else {
724     return 0.0;
725     }
726     }
727    
728     /***********************************************************************************
729     *
730     */
731    
732    
733     /*
734     * fabsx Floating Absolute Value
735     * .484
736     */
737     void ppc_opc_fabsx()
738     {
739     int frD, frA, frB;
740     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
741     PPC_OPC_ASSERT(frA==0);
742     gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT;
743     if (gCPU.current_opc & PPC_OPC_Rc) {
744     // update cr1 flags
745     PPC_FPU_ERR("fabs.\n");
746     }
747     }
748     /*
749     * faddx Floating Add (Double-Precision)
750     * .485
751     */
752     void ppc_opc_faddx()
753     {
754     int frD, frA, frB, frC;
755     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
756     PPC_OPC_ASSERT(frC==0);
757     ppc_double A, B, D;
758     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
759     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
760     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
761     gCPU.fpscr |= FPSCR_VXISI;
762     }
763     ppc_fpu_add(D, A, B);
764     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
765     if (gCPU.current_opc & PPC_OPC_Rc) {
766     // update cr1 flags
767     PPC_FPU_ERR("fadd.\n");
768     }
769     }
770     /*
771     * faddx Floating Add Single
772     * .486
773     */
774     void ppc_opc_faddsx()
775     {
776     int frD, frA, frB, frC;
777     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
778     PPC_OPC_ASSERT(frC==0);
779     ppc_double A, B, D;
780     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
781     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
782     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
783     gCPU.fpscr |= FPSCR_VXISI;
784     }
785     ppc_fpu_add(D, A, B);
786     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
787     if (gCPU.current_opc & PPC_OPC_Rc) {
788     // update cr1 flags
789     PPC_FPU_ERR("fadds.\n");
790     }
791     }
792     /*
793     * fcmpo Floating Compare Ordered
794     * .488
795     */
796     static uint32 ppc_fpu_cmp_and_mask[8] = {
797     0xfffffff0,
798     0xffffff0f,
799     0xfffff0ff,
800     0xffff0fff,
801     0xfff0ffff,
802     0xff0fffff,
803     0xf0ffffff,
804     0x0fffffff,
805     };
806     void ppc_opc_fcmpo()
807     {
808     int crfD, frA, frB;
809     PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
810     crfD >>= 2;
811     ppc_double A, B;
812     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
813     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
814     uint32 cmp;
815     if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
816     gCPU.fpscr |= FPSCR_VXSNAN;
817     /*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC;
818     cmp = 1;
819     } else {
820     cmp = ppc_fpu_compare(A, B);
821     }
822     crfD = 7-crfD;
823     gCPU.fpscr &= ~0x1f000;
824     gCPU.fpscr |= (cmp << 12);
825     gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
826     gCPU.cr |= (cmp << (crfD * 4));
827     }
828     /*
829     * fcmpu Floating Compare Unordered
830     * .489
831     */
832     void ppc_opc_fcmpu()
833     {
834     int crfD, frA, frB;
835     PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
836     crfD >>= 2;
837     ppc_double A, B;
838     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
839     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
840     uint32 cmp;
841     if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
842     gCPU.fpscr |= FPSCR_VXSNAN;
843     cmp = 1;
844     } else {
845     cmp = ppc_fpu_compare(A, B);
846     }
847     crfD = 7-crfD;
848     gCPU.fpscr &= ~0x1f000;
849     gCPU.fpscr |= (cmp << 12);
850     gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
851     gCPU.cr |= (cmp << (crfD * 4));
852     }
853     /*
854     * fctiwx Floating Convert to Integer Word
855     * .492
856     */
857     void ppc_opc_fctiwx()
858     {
859     int frD, frA, frB;
860     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
861     PPC_OPC_ASSERT(frA==0);
862     ppc_double B;
863     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
864     gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
865     if (gCPU.current_opc & PPC_OPC_Rc) {
866     // update cr1 flags
867     PPC_FPU_ERR("fctiw.\n");
868     }
869     }
870     /*
871     * fctiwzx Floating Convert to Integer Word with Round toward Zero
872     * .493
873     */
874     void ppc_opc_fctiwzx()
875     {
876     int frD, frA, frB;
877     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
878     PPC_OPC_ASSERT(frA==0);
879     uint32 oldfpscr = gCPU.fpscr;
880     gCPU.fpscr &= ~3;
881     gCPU.fpscr |= 1;
882     ppc_double B;
883     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
884     gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
885     gCPU.fpscr = oldfpscr;
886     if (gCPU.current_opc & PPC_OPC_Rc) {
887     // update cr1 flags
888     PPC_FPU_ERR("fctiwz.\n");
889     }
890     }
891     /*
892     * fdivx Floating Divide (Double-Precision)
893     * .494
894     */
895     void ppc_opc_fdivx()
896     {
897     int frD, frA, frB, frC;
898     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
899     PPC_OPC_ASSERT(frC==0);
900     ppc_double A, B, D;
901     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
902     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
903     if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
904     gCPU.fpscr |= FPSCR_VXZDZ;
905     }
906     if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
907     gCPU.fpscr |= FPSCR_VXIDI;
908     }
909     if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
910     // FIXME::
911     gCPU.fpscr |= FPSCR_VXIDI;
912     }
913     ppc_fpu_div(D, A, B);
914     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
915     if (gCPU.current_opc & PPC_OPC_Rc) {
916     // update cr1 flags
917     PPC_FPU_ERR("fdiv.\n");
918     }
919     }
920     /*
921     * fdivsx Floating Divide Single
922     * .495
923     */
924     void ppc_opc_fdivsx()
925     {
926     int frD, frA, frB, frC;
927     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
928     PPC_OPC_ASSERT(frC==0);
929     ppc_double A, B, D;
930     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
931     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
932     if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
933     gCPU.fpscr |= FPSCR_VXZDZ;
934     }
935     if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
936     gCPU.fpscr |= FPSCR_VXIDI;
937     }
938     if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
939     // FIXME::
940     gCPU.fpscr |= FPSCR_VXIDI;
941     }
942     ppc_fpu_div(D, A, B);
943     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
944     if (gCPU.current_opc & PPC_OPC_Rc) {
945     // update cr1 flags
946     PPC_FPU_ERR("fdivs.\n");
947     }
948     }
949     /*
950     * fmaddx Floating Multiply-Add (Double-Precision)
951     * .496
952     */
953     void ppc_opc_fmaddx()
954     {
955     int frD, frA, frB, frC;
956     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
957     ppc_double A, B, C, D;
958     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
959     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
960     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
961     ppc_fpu_mul_add(D, A, C, B);
962     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
963     if (gCPU.current_opc & PPC_OPC_Rc) {
964     // update cr1 flags
965     PPC_FPU_ERR("fmadd.\n");
966     }
967     }
968     /*
969     * fmaddx Floating Multiply-Add Single
970     * .497
971     */
972     void ppc_opc_fmaddsx()
973     {
974     int frD, frA, frB, frC;
975     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
976     ppc_double A, B, C, D;
977     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
978     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
979     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
980     ppc_fpu_mul_add(D, A, C, B);
981     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
982     if (gCPU.current_opc & PPC_OPC_Rc) {
983     // update cr1 flags
984     PPC_FPU_ERR("fmadds.\n");
985     }
986     }
987     /*
988     * fmrx Floating Move Register
989     * .498
990     */
991     void ppc_opc_fmrx()
992     {
993     int frD, rA, frB;
994     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB);
995     PPC_OPC_ASSERT(rA==0);
996     gCPU.fpr[frD] = gCPU.fpr[frB];
997     if (gCPU.current_opc & PPC_OPC_Rc) {
998     // update cr1 flags
999     PPC_FPU_ERR("fmr.\n");
1000     }
1001     }
1002     /*
1003     * fmsubx Floating Multiply-Subtract (Double-Precision)
1004     * .499
1005     */
1006     void ppc_opc_fmsubx()
1007     {
1008     int frD, frA, frB, frC;
1009     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1010     ppc_double A, B, C, D;
1011     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1012     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1013     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1014     B.s ^= 1;
1015     ppc_fpu_mul_add(D, A, C, B);
1016     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1017     if (gCPU.current_opc & PPC_OPC_Rc) {
1018     // update cr1 flags
1019     PPC_FPU_ERR("fmsub.\n");
1020     }
1021     }
1022     /*
1023     * fmsubsx Floating Multiply-Subtract Single
1024     * .500
1025     */
1026     void ppc_opc_fmsubsx()
1027     {
1028     int frD, frA, frB, frC;
1029     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1030     ppc_double A, B, C, D;
1031     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1032     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1033     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1034     ppc_fpu_mul_add(D, A, C, B);
1035     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1036     if (gCPU.current_opc & PPC_OPC_Rc) {
1037     // update cr1 flags
1038     PPC_FPU_ERR("fmsubs.\n");
1039     }
1040     }
1041     /*
1042     * fmulx Floating Multipy (Double-Precision)
1043     * .501
1044     */
1045     void ppc_opc_fmulx()
1046     {
1047     int frD, frA, frB, frC;
1048     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1049     PPC_OPC_ASSERT(frB==0);
1050     ppc_double A, C, D;
1051     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1052     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1053     if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1054     || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1055     gCPU.fpscr |= FPSCR_VXIMZ;
1056     }
1057     ppc_fpu_mul(D, A, C);
1058     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1059     // *((double*)&gCPU.fpr[frD]) = *((double*)(&gCPU.fpr[frA]))*(*((double*)(&gCPU.fpr[frC])));
1060     if (gCPU.current_opc & PPC_OPC_Rc) {
1061     // update cr1 flags
1062     PPC_FPU_ERR("fmul.\n");
1063     }
1064     }
1065     /*
1066     * fmulsx Floating Multipy Single
1067     * .502
1068     */
1069     void ppc_opc_fmulsx()
1070     {
1071     int frD, frA, frB, frC;
1072     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1073     PPC_OPC_ASSERT(frB==0);
1074     ppc_double A, C, D;
1075     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1076     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1077     if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1078     || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1079     gCPU.fpscr |= FPSCR_VXIMZ;
1080     }
1081     ppc_fpu_mul(D, A, C);
1082     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1083     if (gCPU.current_opc & PPC_OPC_Rc) {
1084     // update cr1 flags
1085     PPC_FPU_ERR("fmuls.\n");
1086     }
1087     }
1088     /*
1089     * fnabsx Floating Negative Absolute Value
1090     * .503
1091     */
1092     void ppc_opc_fnabsx()
1093     {
1094     int frD, frA, frB;
1095     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1096     PPC_OPC_ASSERT(frA==0);
1097     gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT;
1098     if (gCPU.current_opc & PPC_OPC_Rc) {
1099     // update cr1 flags
1100     PPC_FPU_ERR("fnabs.\n");
1101     }
1102     }
1103     /*
1104     * fnegx Floating Negate
1105     * .504
1106     */
1107     void ppc_opc_fnegx()
1108     {
1109     int frD, frA, frB;
1110     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1111     PPC_OPC_ASSERT(frA==0);
1112     gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT;
1113     if (gCPU.current_opc & PPC_OPC_Rc) {
1114     // update cr1 flags
1115     PPC_FPU_ERR("fneg.\n");
1116     }
1117     }
1118     /*
1119     * fnmaddx Floating Negative Multiply-Add (Double-Precision)
1120     * .505
1121     */
1122     void ppc_opc_fnmaddx()
1123     {
1124     int frD, frA, frB, frC;
1125     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1126     ppc_double A, B, C, D/*, E*/;
1127     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1128     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1129     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1130     ppc_fpu_mul_add(D, A, C, B);
1131     D.s ^= 1;
1132     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1133     if (gCPU.current_opc & PPC_OPC_Rc) {
1134     // update cr1 flags
1135     PPC_FPU_ERR("fnmadd.\n");
1136     }
1137     }
1138     /*
1139     * fnmaddsx Floating Negative Multiply-Add Single
1140     * .506
1141     */
1142     void ppc_opc_fnmaddsx()
1143     {
1144     int frD, frA, frB, frC;
1145     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1146     ppc_double A, B, C, D;
1147     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1148     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1149     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1150     ppc_fpu_mul_add(D, A, C, B);
1151     D.s ^= 1;
1152     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1153     if (gCPU.current_opc & PPC_OPC_Rc) {
1154     // update cr1 flags
1155     PPC_FPU_ERR("fnmadds.\n");
1156     }
1157     }
1158     /*
1159     * fnmsubx Floating Negative Multiply-Subtract (Double-Precision)
1160     * .507
1161     */
1162     void ppc_opc_fnmsubx()
1163     {
1164     int frD, frA, frB, frC;
1165     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1166     ppc_double A, B, C, D;
1167     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1168     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1169     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1170     B.s ^= 1;
1171     ppc_fpu_mul_add(D, A, C, B);
1172     D.s ^= 1;
1173     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1174     if (gCPU.current_opc & PPC_OPC_Rc) {
1175     // update cr1 flags
1176     PPC_FPU_ERR("fnmsub.\n");
1177     }
1178     }
1179     /*
1180     * fnsubsx Floating Negative Multiply-Subtract Single
1181     * .508
1182     */
1183     void ppc_opc_fnmsubsx()
1184     {
1185     int frD, frA, frB, frC;
1186     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1187     ppc_double A, B, C, D;
1188     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1189     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1190     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1191     B.s ^= 1;
1192     ppc_fpu_mul_add(D, A, C, B);
1193     D.s ^= 1;
1194     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1195     if (gCPU.current_opc & PPC_OPC_Rc) {
1196     // update cr1 flags
1197     PPC_FPU_ERR("fnmsubs.\n");
1198     }
1199     }
1200     /*
1201     * fresx Floating Reciprocal Estimate Single
1202     * .509
1203     */
1204     void ppc_opc_fresx()
1205     {
1206     int frD, frA, frB, frC;
1207     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1208     PPC_OPC_ASSERT(frA==0 && frC==0);
1209     if (gCPU.current_opc & PPC_OPC_Rc) {
1210     // update cr1 flags
1211     PPC_FPU_ERR("fres.\n");
1212     }
1213     PPC_FPU_ERR("fres\n");
1214     }
1215     /*
1216     * frspx Floating Round to Single
1217     * .511
1218     */
1219     void ppc_opc_frspx()
1220     {
1221     int frD, frA, frB;
1222     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1223     PPC_OPC_ASSERT(frA==0);
1224     ppc_double B;
1225     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1226     gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]);
1227     if (gCPU.current_opc & PPC_OPC_Rc) {
1228     // update cr1 flags
1229     PPC_FPU_ERR("frsp.\n");
1230     }
1231     }
1232     /*
1233     * frsqrtex Floating Reciprocal Square Root Estimate
1234     * .512
1235     */
1236     void ppc_opc_frsqrtex()
1237     {
1238     int frD, frA, frB, frC;
1239     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1240     PPC_OPC_ASSERT(frA==0 && frC==0);
1241     ppc_double B;
1242     ppc_double D;
1243     ppc_double E;
1244     ppc_double Q;
1245     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1246     ppc_fpu_sqrt(Q, B);
1247     E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL;
1248     ppc_fpu_div(D, E, Q);
1249     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1250     if (gCPU.current_opc & PPC_OPC_Rc) {
1251     // update cr1 flags
1252     PPC_FPU_ERR("frsqrte.\n");
1253     }
1254     }
1255     /*
1256     * fselx Floating Select
1257     * .514
1258     */
1259     void ppc_opc_fselx()
1260     {
1261     int frD, frA, frB, frC;
1262     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1263     ppc_double A;
1264     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1265     if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) {
1266     gCPU.fpr[frD] = gCPU.fpr[frB];
1267     } else {
1268     gCPU.fpr[frD] = gCPU.fpr[frC];
1269     }
1270     if (gCPU.current_opc & PPC_OPC_Rc) {
1271     // update cr1 flags
1272     PPC_FPU_ERR("fsel.\n");
1273     }
1274     }
1275     /*
1276     * fsqrtx Floating Square Root (Double-Precision)
1277     * .515
1278     */
1279     void ppc_opc_fsqrtx()
1280     {
1281     int frD, frA, frB, frC;
1282     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1283     PPC_OPC_ASSERT(frA==0 && frC==0);
1284     ppc_double B;
1285     ppc_double D;
1286     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1287     ppc_fpu_sqrt(D, B);
1288     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1289     if (gCPU.current_opc & PPC_OPC_Rc) {
1290     // update cr1 flags
1291     PPC_FPU_ERR("fsqrt.\n");
1292     }
1293     }
1294     /*
1295     * fsqrtsx Floating Square Root Single
1296     * .515
1297     */
1298     void ppc_opc_fsqrtsx()
1299     {
1300     int frD, frA, frB, frC;
1301     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1302     PPC_OPC_ASSERT(frA==0 && frC==0);
1303     if (gCPU.current_opc & PPC_OPC_Rc) {
1304     // update cr1 flags
1305     PPC_FPU_ERR("fsqrts.\n");
1306     }
1307     PPC_FPU_ERR("fsqrts\n");
1308     }
1309     /*
1310     * fsubx Floating Subtract (Double-Precision)
1311     * .517
1312     */
1313     void ppc_opc_fsubx()
1314     {
1315     int frD, frA, frB, frC;
1316     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1317     PPC_OPC_ASSERT(frC==0);
1318     ppc_double A, B, D;
1319     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1320     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1321     if (B.type != ppc_fpr_NaN) {
1322     B.s ^= 1;
1323     }
1324     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1325     gCPU.fpscr |= FPSCR_VXISI;
1326     }
1327     ppc_fpu_add(D, A, B);
1328     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1329     if (gCPU.current_opc & PPC_OPC_Rc) {
1330     // update cr1 flags
1331     PPC_FPU_ERR("fsub.\n");
1332     }
1333     }
1334     /*
1335     * fsubsx Floating Subtract Single
1336     * .518
1337     */
1338     void ppc_opc_fsubsx()
1339     {
1340     int frD, frA, frB, frC;
1341     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1342     PPC_OPC_ASSERT(frC==0);
1343     ppc_double A, B, D;
1344     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1345     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1346     if (B.type != ppc_fpr_NaN) {
1347     B.s ^= 1;
1348     }
1349     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1350     gCPU.fpscr |= FPSCR_VXISI;
1351     }
1352     ppc_fpu_add(D, A, B);
1353     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1354     if (gCPU.current_opc & PPC_OPC_Rc) {
1355     // update cr1 flags
1356     PPC_FPU_ERR("fsubs.\n");
1357     }
1358     }
1359    

  ViewVC Help
Powered by ViewVC 1.1.26