/[pearpc]/src/cpu/cpu_jitc_x86/ppc_fpu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/cpu/cpu_jitc_x86/ppc_fpu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 51920 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * ppc_fpu.cc
4     *
5     * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6     * Copyright (C) 2003, 2004 Stefan Weyergraf
7     *
8     * This program is free software; you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License version 2 as
10     * published by the Free Software Foundation.
11     *
12     * This program is distributed in the hope that it will be useful,
13     * but WITHOUT ANY WARRANTY; without even the implied warranty of
14     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     * GNU General Public License for more details.
16     *
17     * You should have received a copy of the GNU General Public License
18     * along with this program; if not, write to the Free Software
19     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20     */
21    
22     #include "debug/tracers.h"
23     #include "ppc_cpu.h"
24     #include "ppc_dec.h"
25     #include "ppc_fpu.h"
26    
27     // .121
28     #define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b))
29    
30     const char *ppc_fpu_get_fpr_type(ppc_fpr_type t)
31     {
32     switch (t) {
33     case ppc_fpr_norm: return "norm";
34     case ppc_fpr_zero: return "zero";
35     case ppc_fpr_NaN: return "NaN";
36     case ppc_fpr_Inf: return "Inf";
37     default: return "???";
38     }
39     }
40    
41     inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b)
42     {
43     switch (PPC_FPR_TYPE2(a.type, b.type)) {
44     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
45     int diff = a.e - b.e;
46     if (diff<0) {
47     diff = -diff;
48     if (diff <= 56) {
49     a.m >>= diff;
50     } else if (a.m != 0) {
51     a.m = 1;
52     } else {
53     a.m = 0;
54     }
55     res.e = b.e;
56     } else {
57     if (diff <= 56) {
58     b.m >>= diff;
59     } else if (b.m != 0) {
60     b.m = 1;
61     } else {
62     b.m = 0;
63     }
64     res.e = a.e;
65     }
66     res.type = ppc_fpr_norm;
67     if (a.s == b.s) {
68     res.s = a.s;
69     res.m = a.m + b.m;
70     if (res.m & (1ULL<<56)) {
71     res.m >>= 1;
72     res.e++;
73     }
74     } else {
75     res.s = a.s;
76     res.m = a.m - b.m;
77     if (!res.m) {
78     if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
79     res.s |= b.s;
80     } else {
81     res.s &= b.s;
82     }
83     res.type = ppc_fpr_zero;
84     } else {
85     if ((sint64)res.m < 0) {
86     res.m = b.m - a.m;
87     res.s = b.s;
88     }
89     diff = ppc_fpu_normalize(res) - 8;
90     res.e -= diff;
91     res.m <<= diff;
92     }
93     }
94     break;
95     }
96     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
97     res.s = a.s;
98     res.type = ppc_fpr_NaN;
99     break;
100     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
101     res.e = a.e;
102     // fall-thru
103     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
104     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
105     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
106     res.s = a.s;
107     res.m = a.m;
108     res.type = a.type;
109     break;
110     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
111     res.e = b.e;
112     // fall-thru
113     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
114     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
115     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
116     res.s = b.s;
117     res.m = b.m;
118     res.type = b.type;
119     break;
120     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
121     if (a.s != b.s) {
122     // +oo + -oo == NaN
123     res.s = a.s ^ b.s;
124     res.type = ppc_fpr_NaN;
125     break;
126     }
127     // fall-thru
128     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
129     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
130     res.s = a.s;
131     res.type = a.type;
132     break;
133     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
134     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
135     res.s = b.s;
136     res.type = b.type;
137     break;
138     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
139     // round bla
140     res.type = ppc_fpr_zero;
141     res.s = a.s && b.s;
142     break;
143     }
144     }
145    
146     inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp)
147     {
148     if (exp >= 64) {
149     q.m1 = q.m0;
150     q.m0 = 0;
151     exp -= 64;
152     }
153     uint64 t = q.m0 & ((1ULL<<exp)-1);
154     q.m0 >>= exp;
155     q.m1 >>= exp;
156     q.m1 |= t<<(64-exp);
157     }
158    
159     inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp)
160     {
161     if (exp >= 64) {
162     q.m0 = q.m1;
163     q.m1 = 0;
164     exp -= 64;
165     }
166     uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1);
167     q.m0 <<= exp;
168     q.m1 <<= exp;
169     q.m0 |= t;
170     }
171    
172     inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
173     {
174     res.m1 = a.m1+b.m1;
175     if (res.m1 < a.m1) {
176     res.m0 = a.m0+b.m0+1;
177     } else {
178     res.m0 = a.m0+b.m0;
179     }
180     }
181    
182     inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
183     {
184     res.m1 = a.m1-b.m1;
185     if (a.m1 < b.m1) {
186     res.m0 = a.m0-b.m0-1;
187     } else {
188     res.m0 = a.m0-b.m0;
189     }
190     }
191    
192     // res has 107 significant bits. a, b have 106 significant bits each.
193     inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b)
194     {
195     // treat as 107 bit mantissa
196     if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1);
197     if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1);
198     switch (PPC_FPR_TYPE2(a.type, b.type)) {
199     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
200     int diff = a.e - b.e;
201     if (diff < 0) {
202     diff = -diff;
203     if (diff <= 107) {
204     // FIXME: may set x_prime
205     ppc_fpu_quadro_mshr(a, diff);
206     } else if (a.m0 || a.m1) {
207     a.m0 = 0;
208     a.m1 = 1;
209     } else {
210     a.m0 = 0;
211     a.m1 = 0;
212     }
213     res.e = b.e;
214     } else {
215     if (diff <= 107) {
216     // FIXME: may set x_prime
217     ppc_fpu_quadro_mshr(b, diff);
218     } else if (b.m0 || b.m1) {
219     b.m0 = 0;
220     b.m1 = 1;
221     } else {
222     b.m0 = 0;
223     b.m1 = 0;
224     }
225     res.e = a.e;
226     }
227     res.type = ppc_fpr_norm;
228     if (a.s == b.s) {
229     res.s = a.s;
230     ppc_fpu_add_quadro_m(res, a, b);
231     int X_prime = res.m1 & 1;
232     if (res.m0 & (1ULL<<(107-64))) {
233     ppc_fpu_quadro_mshr(res, 1);
234     res.e++;
235     }
236     // res = [107]
237     res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime;
238     } else {
239     res.s = a.s;
240     int cmp;
241     if (a.m0 < b.m0) {
242     cmp = -1;
243     } else if (a.m0 > b.m0) {
244     cmp = +1;
245     } else {
246     if (a.m1 < b.m1) {
247     cmp = -1;
248     } else if (a.m1 > b.m1) {
249     cmp = +1;
250     } else {
251     cmp = 0;
252     }
253     }
254     if (!cmp) {
255     if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
256     res.s |= b.s;
257     } else {
258     res.s &= b.s;
259     }
260     res.type = ppc_fpr_zero;
261     } else {
262     if (cmp < 0) {
263     ppc_fpu_sub_quadro_m(res, b, a);
264     res.s = b.s;
265     } else {
266     ppc_fpu_sub_quadro_m(res, a, b);
267     }
268     diff = ppc_fpu_normalize_quadro(res) - (128-107);
269     int X_prime = res.m1 & 1;
270     res.m1 &= 0xfffffffffffffffeULL;
271     ppc_fpu_quadro_mshl(res, diff);
272     res.e -= diff;
273     res.m1 |= X_prime;
274     }
275     // res = [107]
276     }
277     break;
278     }
279     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
280     res.s = a.s;
281     res.type = ppc_fpr_NaN;
282     break;
283     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
284     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
285     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
286     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
287     res.e = a.e;
288     res.s = a.s;
289     res.m0 = a.m0;
290     res.m1 = a.m1;
291     res.type = a.type;
292     break;
293     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
294     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
295     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
296     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
297     res.e = b.e;
298     res.s = b.s;
299     res.m0 = b.m0;
300     res.m1 = b.m1;
301     res.type = b.type;
302     break;
303     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
304     if (a.s != b.s) {
305     // +oo + -oo == NaN
306     res.s = a.s ^ b.s;
307     res.type = ppc_fpr_NaN;
308     break;
309     }
310     // fall-thru
311     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
312     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
313     res.s = a.s;
314     res.type = a.type;
315     break;
316     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
317     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
318     res.s = b.s;
319     res.type = b.type;
320     break;
321     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
322     // round bla
323     res.type = ppc_fpr_zero;
324     res.s = a.s && b.s;
325     break;
326     }
327     }
328    
329     inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry)
330     {
331     carry = (a+b < a) ? 1 : 0;
332     a += b;
333     }
334    
335     // 'res' has 56 significant bits on return, a + b have 56 significant bits each
336     inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b)
337     {
338     res.s = a.s ^ b.s;
339     switch (PPC_FPR_TYPE2(a.type, b.type)) {
340     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
341     res.type = ppc_fpr_norm;
342     res.e = a.e + b.e;
343     // printf("new exp: %d\n", res.e);
344     // ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m);
345     uint64 fH, fM1, fM2, fL;
346     fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
347     fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
348     fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
349     fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
350     // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
351    
352     // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
353     uint64 rL, rH;
354     rL = fL; // rL = rH = [63,64]
355     rH = fH; // rH = fH = [47,48]
356     uint64 split;
357     split = fM1 + fM2;
358     uint64 carry;
359     ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
360     rH += carry; // rH = [0 .. 2^48]
361     rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
362    
363     // res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55]
364     // [---------------------------------------------------------]
365     // bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ]
366     // [---------------------------------------------------------]
367     // [15 bits zero | 49 bits rH | 8 most sign.bits rL ]
368     res.m = rH << 9;
369     res.m |= rL >> (64-9);
370     // res.m = [58]
371    
372     // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
373     if (res.m & (1ULL << 57)) {
374     res.m >>= 2;
375     res.e += 2;
376     } else if (res.m & (1ULL << 56)) {
377     res.m >>= 1;
378     res.e++;
379     }
380     // res.m = [56]
381     break;
382     }
383     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
384     res.type = a.type;
385     res.e = a.e;
386     break;
387     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
388     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
389     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
390     res.s = a.s;
391     // fall-thru
392     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
393     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
394     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
395     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
396     res.type = a.type;
397     break;
398     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
399     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
400     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
401     res.s = b.s;
402     // fall-thru
403     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
404     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
405     res.type = b.type;
406     break;
407     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
408     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
409     res.type = ppc_fpr_NaN;
410     break;
411     }
412     }
413    
414     // 'res' has 'prec' significant bits on return, a + b have 56 significant bits each
415     // for 111 >= prec >= 64
416     inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec)
417     {
418     res.s = a.s ^ b.s;
419     switch (PPC_FPR_TYPE2(a.type, b.type)) {
420     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
421     res.type = ppc_fpr_norm;
422     res.e = a.e + b.e;
423     // printf("new exp: %d\n", res.e);
424     // ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m);
425     uint64 fH, fM1, fM2, fL;
426     fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
427     fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
428     fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
429     fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
430     // ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL);
431    
432     // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
433     uint64 rL, rH;
434     rL = fL; // rL = rH = [63,64]
435     rH = fH; // rH = fH = [47,48]
436     uint64 split;
437     split = fM1 + fM2;
438     uint64 carry;
439     ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
440     rH += carry; // rH = [0 .. 2^48]
441     rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
442    
443     // res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0]
444     // [-----------------------------------------------------------]
445     // log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ]
446     // [-----------------------------------------------------------]
447     // [ 15 bits zero | 49 bits rH | 64 bits rL ]
448     res.m0 = rH;
449     res.m1 = rL;
450     // res.m0|res.m1 = [111,112,113]
451    
452     // ht_printf("res = %016qx%016qx\n", res.m0, res.m1);
453     if (res.m0 & (1ULL << 48)) {
454     ppc_fpu_quadro_mshr(res, 2+(111-prec));
455     res.e += 2;
456     } else if (res.m0 & (1ULL << 47)) {
457     ppc_fpu_quadro_mshr(res, 1+(111-prec));
458     res.e += 1;
459     } else {
460     ppc_fpu_quadro_mshr(res, 111-prec);
461     }
462     // res.m0|res.m1 = [prec]
463     break;
464     }
465     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
466     res.type = a.type;
467     res.e = a.e;
468     break;
469     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
470     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
471     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
472     res.s = a.s;
473     // fall-thru
474     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
475     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
476     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
477     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
478     res.type = a.type;
479     break;
480     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
481     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
482     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
483     res.s = b.s;
484     // fall-thru
485     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
486     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
487     res.type = b.type;
488     break;
489     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
490     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
491     res.type = ppc_fpr_NaN;
492     break;
493     }
494     }
495    
496     // calculate one of these:
497     // + m1 * m2 + s
498     // + m1 * m2 - s
499     // - m1 * m2 + s
500     // - m1 * m2 - s
501     // using a 106 bit accumulator
502     //
503     // .752
504     //
505     inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2,
506     ppc_double &s)
507     {
508     ppc_quadro p;
509     /* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e,
510     ppc_fpu_get_fpr_type(m1.type));
511     ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e,
512     ppc_fpu_get_fpr_type(m2.type));*/
513     // create product with 106 significant bits
514     ppc_fpu_mul_quadro(p, m1, m2, 106);
515     /* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e,
516     ppc_fpu_get_fpr_type(p.type));*/
517     // convert s into ppc_quadro
518     /* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e,
519     ppc_fpu_get_fpr_type(s.type));*/
520     ppc_quadro q;
521     q.e = s.e;
522     q.s = s.s;
523     q.type = s.type;
524     q.m0 = 0;
525     q.m1 = s.m;
526     // .. with 106 significant bits
527     ppc_fpu_quadro_mshl(q, 106-56);
528     /* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e,
529     ppc_fpu_get_fpr_type(q.type));*/
530     // now we must add p, q.
531     ppc_quadro x;
532     ppc_fpu_add_quadro(x, p, q);
533     // x = [107]
534     /* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e,
535     ppc_fpu_get_fpr_type(x.type));*/
536     res.type = x.type;
537     res.s = x.s;
538     res.e = x.e;
539     if (x.type == ppc_fpr_norm) {
540     res.m = x.m0 << 13; // 43 bits from m0
541     res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1
542     res.m |= x.m1 & 1; // X' bit from m1
543     }
544     /* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e,
545     ppc_fpu_get_fpr_type(res.type));*/
546     }
547    
548     inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b)
549     {
550     res.s = a.s ^ b.s;
551     switch (PPC_FPR_TYPE2(a.type, b.type)) {
552     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
553     res.type = ppc_fpr_norm;
554     res.e = a.e - b.e;
555     res.m = 0;
556     uint64 am = a.m, bm = b.m;
557     uint i = 0;
558     // printf("DIV:\nam=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
559     while (am && (i<56)) {
560     res.m <<= 1;
561     if (am >= bm) {
562     res.m |= 1;
563     am -= bm;
564     }
565     am <<= 1;
566     // printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
567     i++;
568     }
569     res.m <<= 57-i;
570     if (res.m & (1ULL << 56)) {
571     res.m >>= 1;
572     } else {
573     res.e--;
574     }
575     // printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
576     break;
577     }
578     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
579     res.e = a.e;
580     // fall-thru
581     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
582     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
583     case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
584     res.s = a.s;
585     // fall-thru
586     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
587     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
588     res.type = a.type;
589     break;
590     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
591     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
592     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
593     res.s = b.s;
594     res.type = b.type;
595     break;
596     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
597     res.type = ppc_fpr_zero;
598     break;
599     case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
600     res.type = ppc_fpr_Inf;
601     break;
602     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
603     case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
604     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
605     case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
606     res.type = ppc_fpr_NaN;
607     break;
608     }
609     }
610    
611     inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B)
612     {
613     switch (B.type) {
614     case ppc_fpr_norm:
615     if (B.s) {
616     D.type = ppc_fpr_NaN;
617     gCPU.fpscr |= FPSCR_VXSQRT;
618     break;
619     }
620     // D := 1/2(D_old + B/D_old)
621     D = B;
622     D.e /= 2;
623     for (int i=0; i<6; i++) {
624     ppc_double D_old = D;
625     ppc_double B_div_D_old;
626     ppc_fpu_div(B_div_D_old, B, D_old);
627     ppc_fpu_add(D, D_old, B_div_D_old);
628     D.e--;
629    
630     /* uint64 e;
631     ppc_double E = D;
632     ppc_fpu_pack_double(E, e);
633     printf("%.20f\n", *(double *)&e);*/
634     }
635     break;
636     case ppc_fpr_zero:
637     D.type = ppc_fpr_zero;
638     D.s = B.s;
639     break;
640     case ppc_fpr_Inf:
641     if (B.s) {
642     D.type = ppc_fpr_NaN;
643     gCPU.fpscr |= FPSCR_VXSQRT;
644     } else {
645     D.type = ppc_fpr_Inf;
646     D.s = 0;
647     }
648     break;
649     case ppc_fpr_NaN:
650     D.type = ppc_fpr_NaN;
651     break;
652     }
653     }
654    
655     void ppc_fpu_test()
656     {
657     double bb = 1.0;
658     uint64 b = *(uint64 *)&bb;
659     ppc_double B;
660     ppc_double D;
661     ppc_fpu_unpack_double(B, b);
662     ht_printf("%d\n", B.e);
663     ppc_fpu_sqrt(D, B);
664     uint64 d;
665     gCPU.fpscr |= ppc_fpu_pack_double(D, d);
666     printf("%f\n", *(double *)&d);
667     /* ppc_double A, B, C, D, E;
668     ppc_fpu_unpack_double(A, 0xc00fafcd6c40e500ULL);
669     ppc_fpu_unpack_double(B, 0xc00fafcd6c40e4beULL);
670     B.s ^= 1;
671     ppc_fpu_add(E, A, B);
672     uint64 e;
673     ppc_fpu_pack_double(E, e);
674     ht_printf("%qx\n", e);
675     ppc_fpu_add(D, E, B);*/
676    
677     /* ppc_double A, B, C;
678     double a, b, c;
679     A.type = B.type = ppc_fpr_norm;
680     A.s = 1;
681     A.e = 0;
682     A.m = 0;
683     A.m = ((1ULL<<56)-1)-((1ULL<<10)-1);
684     ht_printf("%qb\n", A.m);
685     B.s = 1;
686     B.e = 0;
687     B.m = 0;
688     B.m = ((1ULL<<56)-1)-((1ULL<<50)-1);
689     a = ppc_fpu_get_double(A);
690     b = ppc_fpu_get_double(B);
691     printf("%f + %f = \n", a, b);
692     ppc_fpu_add(C, A, B);
693     uint64 d;
694     uint32 s;
695     ppc_fpu_pack_double_as_single(C, d);
696     ht_printf("%064qb\n", d);
697     ppc_fpu_unpack_double(C, d);
698     ppc_fpu_pack_single(C, s);
699     ht_printf("single: %032b\n", s);
700     ppc_single Cs;
701     ppc_fpu_unpack_single(Cs, s);
702     ppc_fpu_single_to_double(Cs, C);
703     // ht_printf("%d\n", ppc_fpu_double_to_int(C));
704     c = ppc_fpu_get_double(C);
705     printf("%f\n", c);*/
706     }
707    
708     /*
709     * a and b must not be NaNs
710     */
711     inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b)
712     {
713     if (a.type == ppc_fpr_zero) {
714     if (b.type == ppc_fpr_zero) return 2;
715     return (b.s) ? 4: 8;
716     }
717     if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4;
718     if (a.s != b.s) return (a.s) ? 8: 4;
719     if (a.e > b.e) return (a.s) ? 8: 4;
720     if (a.e < b.e) return (a.s) ? 4: 8;
721     if (a.m > b.m) return (a.s) ? 8: 4;
722     if (a.m < b.m) return (a.s) ? 4: 8;
723     return 2;
724     }
725    
726     double ppc_fpu_get_double(uint64 d)
727     {
728     ppc_double dd;
729     ppc_fpu_unpack_double(dd, d);
730     return ppc_fpu_get_double(dd);
731     }
732    
733     double ppc_fpu_get_double(ppc_double &d)
734     {
735     if (d.type == ppc_fpr_norm) {
736     double r = d.m;
737     for (int i=0; i<55; i++) {
738     r = r / 2.0;
739     }
740     if (d.e < 0) {
741     for (int i=0; i>d.e; i--) {
742     r = r / 2.0;
743     }
744     } else if (d.e > 0) {
745     for (int i=0; i<d.e; i++) {
746     r = r * 2.0;
747     }
748     }
749     if (d.s) r = -r;
750     return r;
751     } else {
752     return 0.0;
753     }
754     }
755    
756     /***********************************************************************************
757     *
758     */
759    
760     #define SWAP do { \
761     int tmp = frA; frA = frB; frB = tmp; \
762     tmp = a; a = b; b = tmp; \
763     X86FloatArithOp tmpop = op; op = rop; rop = tmpop; \
764     } while(0);
765    
766    
767     static void ppc_opc_gen_binary_floatop(X86FloatArithOp op, X86FloatArithOp rop, int frD, int frA, int frB)
768     {
769     jitcFloatRegisterClobberAll();
770     // jitcSetFPUPrecision(53);
771    
772     // ht_printf("binfloatop: %x: %d: %d, %d, %d\n", gJITC.pc, op, frD, frA, frB);
773     // op == st(i)/st(0)
774     // rop == st(0)/st(i)
775    
776     // op == st(0)/mem
777     // rop == mem/st(0)
778    
779     // frD := frA (op) frB = frB (rop) frA
780    
781     // make sure client float register aren't mapped to integer registers
782     jitcClobberClientRegisterForFloat(frA);
783     jitcClobberClientRegisterForFloat(frB);
784     jitcInvalidateClientRegisterForFloat(frD);
785    
786     JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
787     JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB);
788     // ht_printf("%d -> %d\n", frA, a);
789     // ht_printf("%d -> %d\n", frB, b);
790     if (a == JITC_FLOAT_REG_NONE && b != JITC_FLOAT_REG_NONE) {
791     // b is mapped but not a, swap them
792     SWAP;
793     }
794     if (a != JITC_FLOAT_REG_NONE) {
795     // a is mapped
796     if (frB == frD && frA != frD) {
797     // b = st(a) (op) b
798     // ht_printf("case a\n");
799     b = jitcGetClientFloatRegister(frB, a);
800     if (jitcFloatRegisterIsTOP(b)) {
801     asmFArith_ST0(op, jitcFloatRegisterToNative(a));
802     } else {
803     jitcFloatRegisterXCHGToFront(a);
804     asmFArith_STi(rop, jitcFloatRegisterToNative(b));
805     }
806     jitcFloatRegisterDirty(b);
807     } else if (frA == frD) {
808     // st(a) = st(a) (op) b
809     // ht_printf("case b\n");
810     b = jitcGetClientFloatRegister(frB, a);
811     if (jitcFloatRegisterIsTOP(b)) {
812     asmFArith_STi(op, jitcFloatRegisterToNative(a));
813     } else {
814     jitcFloatRegisterXCHGToFront(a);
815     asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
816     }
817     jitcFloatRegisterDirty(a);
818     } else {
819     // ht_printf("case c\n");
820     // frA != frD != frB (and frA is mapped, frD isn't mapped)
821     a = jitcFloatRegisterDup(a, b);
822     // ht_printf("%d\n", b);
823     // now a is TOP
824     if (b != JITC_FLOAT_REG_NONE) {
825     asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
826     } else {
827     modrm_o modrm;
828     asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
829     }
830     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
831     if (d == JITC_FLOAT_REG_NONE) {
832     jitcMapClientFloatRegisterDirty(frD, a);
833     } else {
834     jitcFloatRegisterStoreAndPopTOP(d);
835     jitcFloatRegisterDirty(d);
836     }
837     }
838     } else {
839     // ht_printf("case d\n");
840     // neither a nor b is mapped
841     if (frB == frD && frA != frD) {
842     // frB = frA (op) frB, none of them is mapped
843     b = jitcGetClientFloatRegister(frB);
844     jitcFloatRegisterDirty(b);
845     modrm_o modrm;
846     asmFArith(rop, x86_mem2(modrm, &gCPU.fpr[frA]));
847     return;
848     }
849     if (frA == frD) {
850     // frA = frA (op) frB, none of them is mapped
851     a = jitcGetClientFloatRegister(frA);
852     jitcFloatRegisterDirty(a);
853     modrm_o modrm;
854     asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
855     } else {
856     // frA != frD != frB (and frA, frB aren't mapped)
857     a = jitcGetClientFloatRegisterUnmapped(frA);
858     modrm_o modrm;
859     asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
860     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
861     if (d == JITC_FLOAT_REG_NONE) {
862     jitcMapClientFloatRegisterDirty(frD, a);
863     } else {
864     jitcFloatRegisterStoreAndPopTOP(d);
865     jitcFloatRegisterDirty(d);
866     }
867     }
868     }
869     }
870    
871     static inline void ppc_opc_gen_unary_floatop(X86FloatOp op, int frD, int frA)
872     {
873     jitcClobberClientRegisterForFloat(frA);
874     jitcInvalidateClientRegisterForFloat(frD);
875     if (frD == frA) {
876     JitcFloatReg a = jitcGetClientFloatRegister(frA);
877     jitcFloatRegisterDirty(a);
878     jitcFloatRegisterXCHGToFront(a);
879     } else {
880     JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
881     if (a == JITC_FLOAT_REG_NONE) {
882     a = jitcGetClientFloatRegisterUnmapped(frA);
883     } else {
884     a = jitcFloatRegisterDup(a);
885     }
886     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
887     if (d == JITC_FLOAT_REG_NONE) {
888     jitcMapClientFloatRegisterDirty(frD, a);
889     } else {
890     asmFSimple(op);
891     jitcFloatRegisterStoreAndPopTOP(d);
892     jitcFloatRegisterDirty(d);
893     return;
894     }
895     }
896     asmFSimple(op);
897     }
898    
899    
900     /*
901     fmadd FADD false
902     fmsub FSUB false
903     fnmadd FADD true
904     fnmsub FSUBR false
905     */
906    
907     static void ppc_opc_gen_ternary_floatop(X86FloatArithOp op, X86FloatArithOp rop, bool chs, int frD, int frA, int frC, int frB)
908     {
909     jitcFloatRegisterClobberAll();
910     // jitcSetFPUPrecision(64);
911     jitcClobberClientRegisterForFloat(frA);
912     jitcClobberClientRegisterForFloat(frC);
913     jitcClobberClientRegisterForFloat(frB);
914     jitcInvalidateClientRegisterForFloat(frD);
915    
916     JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
917     if (a != JITC_FLOAT_REG_NONE) {
918     ht_printf("askf lsa flsd\n");
919     a = jitcFloatRegisterDup(a, jitcGetClientFloatRegisterMapping(frC));
920     } else {
921     a = jitcGetClientFloatRegisterUnmapped(frA, jitcGetClientFloatRegisterMapping(frC), jitcGetClientFloatRegisterMapping(frB));
922     }
923     // a is TOP now
924     JitcFloatReg c = jitcGetClientFloatRegisterMapping(frC);
925     if (c != JITC_FLOAT_REG_NONE) {
926     asmFArith_ST0(X86_FMUL, jitcFloatRegisterToNative(c));
927     } else {
928     modrm_o modrm;
929     asmFArith(X86_FMUL, x86_mem2(modrm, &gCPU.fpr[frC]));
930     }
931     JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB);
932     if (b != JITC_FLOAT_REG_NONE) {
933     asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
934     } else {
935     modrm_o modrm;
936     asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
937     }
938     if (chs) {
939     asmFSimple(FCHS);
940     }
941     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
942     if (d == JITC_FLOAT_REG_NONE) {
943     jitcMapClientFloatRegisterDirty(frD, a);
944     } else {
945     jitcFloatRegisterStoreAndPopTOP(d);
946     jitcFloatRegisterDirty(d);
947     }
948     }
949    
950     #define JITC
951    
952     static void FASTCALL ppc_opc_gen_update_cr1_output_err(const char *err)
953     {
954     PPC_FPU_ERR("%s\n", err);
955     }
956    
957     static void ppc_opc_gen_update_cr1(const char *err)
958     {
959     asmALU(X86_MOV, EAX, (uint32)err);
960     asmCALL((NativeAddress)ppc_opc_gen_update_cr1_output_err);
961     }
962    
963     /*
964     * fabsx Floating Absolute Value
965     * .484
966     */
967     void ppc_opc_fabsx()
968     {
969     int frD, frA, frB;
970     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
971     PPC_OPC_ASSERT(frA==0);
972     gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT;
973     if (gCPU.current_opc & PPC_OPC_Rc) {
974     // update cr1 flags
975     PPC_FPU_ERR("fabs.\n");
976     }
977     }
978     JITCFlow ppc_opc_gen_fabsx()
979     {
980     int frD, frA, frB;
981     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
982     if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
983     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
984     if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
985     jitcClobberCarryAndFlags();
986     if (frD != frB) {
987     NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
988     NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
989     NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
990     NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
991     asmALU(X86_MOV, dh, bh);
992     asmALU(X86_MOV, dl, bl);
993     asmALU(X86_AND, dh, 0x7fffffff);
994     } else {
995     NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
996     asmALU(X86_AND, b, 0x7fffffff);
997     }
998     } else {
999     ppc_opc_gen_unary_floatop(FABS, frD, frB);
1000     }
1001     if (gJITC.current_opc & PPC_OPC_Rc) {
1002     // update cr1 flags
1003     ppc_opc_gen_update_cr1("fabs.\n");
1004     }
1005     return flowContinue;
1006     }
1007     /*
1008     * faddx Floating Add (Double-Precision)
1009     * .485
1010     */
1011     void ppc_opc_faddx()
1012     {
1013     int frD, frA, frB, frC;
1014     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1015     PPC_OPC_ASSERT(frC==0);
1016     ppc_double A, B, D;
1017     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1018     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1019     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1020     gCPU.fpscr |= FPSCR_VXISI;
1021     }
1022     ppc_fpu_add(D, A, B);
1023     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1024     if (gCPU.current_opc & PPC_OPC_Rc) {
1025     // update cr1 flags
1026     PPC_FPU_ERR("fadd.\n");
1027     }
1028     }
1029     JITCFlow ppc_opc_gen_faddx()
1030     {
1031     #ifdef JITC
1032     int frD, frA, frB, frC;
1033     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1034     PPC_OPC_ASSERT(frC==0);
1035     ppc_opc_gen_binary_floatop(X86_FADD, X86_FADD, frD, frA, frB);
1036     if (gJITC.current_opc & PPC_OPC_Rc) {
1037     // update cr1 flags
1038     ppc_opc_gen_update_cr1("fadd.\n");
1039     }
1040     return flowContinue;
1041     #else
1042     ppc_opc_gen_interpret(ppc_opc_faddx);
1043     return flowEndBlock;
1044     #endif
1045     }
1046     /*
1047     * faddsx Floating Add Single
1048     * .486
1049     */
1050     void ppc_opc_faddsx()
1051     {
1052     int frD, frA, frB, frC;
1053     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1054     PPC_OPC_ASSERT(frC==0);
1055     ppc_double A, B, D;
1056     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1057     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1058     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1059     gCPU.fpscr |= FPSCR_VXISI;
1060     }
1061     ppc_fpu_add(D, A, B);
1062     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1063     if (gCPU.current_opc & PPC_OPC_Rc) {
1064     // update cr1 flags
1065     PPC_FPU_ERR("fadds.\n");
1066     }
1067     }
1068     JITCFlow ppc_opc_gen_faddsx()
1069     {
1070     ppc_opc_gen_interpret(ppc_opc_faddsx);
1071     return flowEndBlock;
1072     }
1073     /*
1074     * fcmpo Floating Compare Ordered
1075     * .488
1076     */
1077     static uint32 ppc_fpu_cmp_and_mask[8] = {
1078     0xfffffff0,
1079     0xffffff0f,
1080     0xfffff0ff,
1081     0xffff0fff,
1082     0xfff0ffff,
1083     0xff0fffff,
1084     0xf0ffffff,
1085     0x0fffffff,
1086     };
1087     void ppc_opc_fcmpo()
1088     {
1089     int crfD, frA, frB;
1090     PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
1091     crfD >>= 2;
1092     ppc_double A, B;
1093     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1094     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1095     uint32 cmp;
1096     if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
1097     gCPU.fpscr |= FPSCR_VXSNAN;
1098     /*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC;
1099     cmp = 1;
1100     } else {
1101     cmp = ppc_fpu_compare(A, B);
1102     }
1103     crfD = 7-crfD;
1104     gCPU.fpscr &= ~0x1f000;
1105     gCPU.fpscr |= (cmp << 12);
1106     gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
1107     gCPU.cr |= (cmp << (crfD * 4));
1108     }
1109     JITCFlow ppc_opc_gen_fcmpo()
1110     {
1111     ppc_opc_gen_interpret(ppc_opc_fcmpo);
1112     return flowEndBlock;
1113     }
1114     /*
1115     * fcmpu Floating Compare Unordered
1116     * .489
1117     */
1118     void ppc_opc_fcmpu()
1119     {
1120     int crfD, frA, frB;
1121     PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
1122     crfD >>= 2;
1123     ppc_double A, B;
1124     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1125     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1126     uint32 cmp;
1127     if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
1128     gCPU.fpscr |= FPSCR_VXSNAN;
1129     cmp = 1;
1130     } else {
1131     cmp = ppc_fpu_compare(A, B);
1132     }
1133     crfD = 7-crfD;
1134     gCPU.fpscr &= ~0x1f000;
1135     gCPU.fpscr |= (cmp << 12);
1136     gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
1137     gCPU.cr |= (cmp << (crfD * 4));
1138     }
1139     JITCFlow ppc_opc_gen_fcmpu()
1140     {
1141     ppc_opc_gen_interpret(ppc_opc_fcmpu);
1142     return flowEndBlock;
1143     }
1144     /*
1145     * fctiwx Floating Convert to Integer Word
1146     * .492
1147     */
1148     void ppc_opc_fctiwx()
1149     {
1150     int frD, frA, frB;
1151     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1152     PPC_OPC_ASSERT(frA==0);
1153     ppc_double B;
1154     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1155     gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
1156     if (gCPU.current_opc & PPC_OPC_Rc) {
1157     // update cr1 flags
1158     PPC_FPU_ERR("fctiw.\n");
1159     }
1160     }
1161     JITCFlow ppc_opc_gen_fctiwx()
1162     {
1163     int frD, frA, frB;
1164     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1165     PPC_OPC_ASSERT(frA==0);
1166     jitcClobberClientRegisterForFloat(frB);
1167     jitcInvalidateClientRegisterForFloat(frD);
1168    
1169     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1170     if (frB != frD && d != JITC_FLOAT_REG_NONE) {
1171     jitcFloatRegisterXCHGToFront(d);
1172     asmFFREEP(Float_ST0);
1173     gJITC.nativeFloatRegState[d] = rsUnused;
1174     gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE;
1175     gJITC.nativeFloatTOP--;
1176     }
1177    
1178     modrm_o modrm;
1179     JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB);
1180     asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD]));
1181     gJITC.nativeFloatRegState[b] = rsUnused;
1182     gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE;
1183     gJITC.nativeFloatTOP--;
1184    
1185     if (gJITC.current_opc & PPC_OPC_Rc) {
1186     // update cr1 flags
1187     ppc_opc_gen_update_cr1("fctiw.\n");
1188     }
1189     return flowContinue;
1190     }
1191     /*
1192     * fctiwzx Floating Convert to Integer Word with Round toward Zero
1193     * .493
1194     */
1195     void ppc_opc_fctiwzx()
1196     {
1197     int frD, frA, frB;
1198     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1199     PPC_OPC_ASSERT(frA==0);
1200     uint32 oldfpscr = gCPU.fpscr;
1201     gCPU.fpscr &= ~3;
1202     gCPU.fpscr |= 1;
1203     ppc_double B;
1204     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1205     gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
1206     gCPU.fpscr = oldfpscr;
1207     if (gCPU.current_opc & PPC_OPC_Rc) {
1208     // update cr1 flags
1209     PPC_FPU_ERR("fctiwz.\n");
1210     }
1211     }
1212     JITCFlow ppc_opc_gen_fctiwzx()
1213     {
1214     int frD, frA, frB;
1215     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1216     PPC_OPC_ASSERT(frA==0);
1217    
1218     static uint16 cw = 0xfff;
1219    
1220     modrm_o modrm;
1221     if (!gJITC.hostCPUCaps.sse3) {
1222     asmFLDCW(x86_mem2(modrm, &cw));
1223     }
1224    
1225     jitcClobberClientRegisterForFloat(frB);
1226     jitcInvalidateClientRegisterForFloat(frD);
1227    
1228     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1229     if (frB != frD && d != JITC_FLOAT_REG_NONE) {
1230     jitcFloatRegisterXCHGToFront(d);
1231     asmFFREEP(Float_ST0);
1232     gJITC.nativeFloatRegState[d] = rsUnused;
1233     gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE;
1234     gJITC.nativeFloatTOP--;
1235     }
1236    
1237     JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB);
1238     if (gJITC.hostCPUCaps.sse3) {
1239     asmFISTTP(x86_mem2(modrm, &gCPU.fpr[frD]));
1240     } else {
1241     asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD]));
1242     }
1243     gJITC.nativeFloatRegState[b] = rsUnused;
1244     gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE;
1245     gJITC.nativeFloatTOP--;
1246    
1247     if (!gJITC.hostCPUCaps.sse3) {
1248     asmFLDCW(x86_mem2(modrm, &gCPU.x87cw));
1249     }
1250    
1251     if (gJITC.current_opc & PPC_OPC_Rc) {
1252     // update cr1 flags
1253     ppc_opc_gen_update_cr1("fctiwz.\n");
1254     }
1255     return flowContinue;
1256     }
1257     /*
1258     * fdivx Floating Divide (Double-Precision)
1259     * .494
1260     */
1261     void ppc_opc_fdivx()
1262     {
1263     int frD, frA, frB, frC;
1264     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1265     PPC_OPC_ASSERT(frC==0);
1266     ppc_double A, B, D;
1267     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1268     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1269     if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
1270     gCPU.fpscr |= FPSCR_VXZDZ;
1271     }
1272     if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1273     gCPU.fpscr |= FPSCR_VXIDI;
1274     }
1275     if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
1276     // FIXME::
1277     gCPU.fpscr |= FPSCR_VXIDI;
1278     }
1279     ppc_fpu_div(D, A, B);
1280     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1281     if (gCPU.current_opc & PPC_OPC_Rc) {
1282     // update cr1 flags
1283     PPC_FPU_ERR("fdiv.\n");
1284     }
1285     }
1286     JITCFlow ppc_opc_gen_fdivx()
1287     {
1288     #ifndef JITC
1289     ppc_opc_gen_interpret(ppc_opc_fdivx);
1290     return flowEndBlock;
1291     #else
1292     int frD, frA, frB, frC;
1293     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1294     PPC_OPC_ASSERT(frC==0);
1295     ppc_opc_gen_binary_floatop(X86_FDIV, X86_FDIVR, frD, frA, frB);
1296     if (gJITC.current_opc & PPC_OPC_Rc) {
1297     // update cr1 flags
1298     ppc_opc_gen_update_cr1("fdiv.\n");
1299     }
1300     return flowContinue;
1301     #endif
1302     }
1303     /*
1304     * fdivsx Floating Divide Single
1305     * .495
1306     */
1307     void ppc_opc_fdivsx()
1308     {
1309     int frD, frA, frB, frC;
1310     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1311     PPC_OPC_ASSERT(frC==0);
1312     ppc_double A, B, D;
1313     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1314     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1315     if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
1316     gCPU.fpscr |= FPSCR_VXZDZ;
1317     }
1318     if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1319     gCPU.fpscr |= FPSCR_VXIDI;
1320     }
1321     if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
1322     // FIXME::
1323     gCPU.fpscr |= FPSCR_VXIDI;
1324     }
1325     ppc_fpu_div(D, A, B);
1326     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1327     if (gCPU.current_opc & PPC_OPC_Rc) {
1328     // update cr1 flags
1329     PPC_FPU_ERR("fdivs.\n");
1330     }
1331     }
1332     JITCFlow ppc_opc_gen_fdivsx()
1333     {
1334     ppc_opc_gen_interpret(ppc_opc_fdivsx);
1335     return flowEndBlock;
1336     }
1337     /*
1338     * fmaddx Floating Multiply-Add (Double-Precision)
1339     * .496
1340     */
1341     void ppc_opc_fmaddx()
1342     {
1343     int frD, frA, frB, frC;
1344     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1345     ppc_double A, B, C, D;
1346     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1347     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1348     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1349     ppc_fpu_mul_add(D, A, C, B);
1350     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1351     if (gCPU.current_opc & PPC_OPC_Rc) {
1352     // update cr1 flags
1353     PPC_FPU_ERR("fmadd.\n");
1354     }
1355     }
1356     JITCFlow ppc_opc_gen_fmaddx()
1357     {
1358     int frD, frA, frB, frC;
1359     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1360     ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, false, frD, frA, frC, frB);
1361     if (gJITC.current_opc & PPC_OPC_Rc) {
1362     // update cr1 flags
1363     ppc_opc_gen_update_cr1("fmadd.\n");
1364     }
1365     return flowContinue;
1366     }
1367     /*
1368     * fmaddx Floating Multiply-Add Single
1369     * .497
1370     */
1371     void ppc_opc_fmaddsx()
1372     {
1373     int frD, frA, frB, frC;
1374     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1375     ppc_double A, B, C, D;
1376     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1377     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1378     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1379     ppc_fpu_mul_add(D, A, C, B);
1380     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1381     if (gCPU.current_opc & PPC_OPC_Rc) {
1382     // update cr1 flags
1383     PPC_FPU_ERR("fmadds.\n");
1384     }
1385     }
1386     JITCFlow ppc_opc_gen_fmaddsx()
1387     {
1388     ppc_opc_gen_interpret(ppc_opc_fmaddsx);
1389     return flowEndBlock;
1390     }
1391     /*
1392     * fmrx Floating Move Register
1393     * .498
1394     */
1395     void ppc_opc_fmrx()
1396     {
1397     int frD, rA, frB;
1398     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB);
1399     PPC_OPC_ASSERT(rA==0);
1400     gCPU.fpr[frD] = gCPU.fpr[frB];
1401     if (gCPU.current_opc & PPC_OPC_Rc) {
1402     // update cr1 flags
1403     PPC_FPU_ERR("fmr.\n");
1404     }
1405     }
1406     JITCFlow ppc_opc_gen_fmrx()
1407     {
1408     int frD, frA, frB;
1409     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1410     if (frD != frB) {
1411     JitcFloatReg a = jitcGetClientFloatRegisterMapping(frB);
1412     if (a == JITC_FLOAT_REG_NONE) {
1413     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1414     if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1415     NativeReg bu = jitcGetClientRegister(PPC_FPR_U(frB));
1416     NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1417     NativeReg du = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1418     NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1419     asmALU(X86_MOV, du, bu);
1420     asmALU(X86_MOV, dl, bl);
1421     } else {
1422     jitcInvalidateClientRegisterForFloat(frD);
1423     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1424     if (d == JITC_FLOAT_REG_NONE) {
1425     d = jitcFloatRegisterDup(a);
1426     jitcMapClientFloatRegisterDirty(frD, d);
1427     } else {
1428     jitcFloatRegisterXCHGToFront(a);
1429     asmFST(jitcFloatRegisterToNative(d));
1430     jitcFloatRegisterDirty(d);
1431     }
1432     }
1433     }
1434     if (gJITC.current_opc & PPC_OPC_Rc) {
1435     // update cr1 flags
1436     ppc_opc_gen_update_cr1("fabs.\n");
1437     }
1438     return flowContinue;
1439     }
1440     /*
1441     * fmsubx Floating Multiply-Subtract (Double-Precision)
1442     * .499
1443     */
1444     void ppc_opc_fmsubx()
1445     {
1446     int frD, frA, frB, frC;
1447     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1448     ppc_double A, B, C, D;
1449     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1450     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1451     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1452     B.s ^= 1;
1453     ppc_fpu_mul_add(D, A, C, B);
1454     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1455     if (gCPU.current_opc & PPC_OPC_Rc) {
1456     // update cr1 flags
1457     PPC_FPU_ERR("fmsub.\n");
1458     }
1459     }
1460     JITCFlow ppc_opc_gen_fmsubx()
1461     {
1462     int frD, frA, frB, frC;
1463     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1464     ppc_opc_gen_ternary_floatop(X86_FSUB, X86_FSUBR, false, frD, frA, frC, frB);
1465     if (gJITC.current_opc & PPC_OPC_Rc) {
1466     // update cr1 flags
1467     ppc_opc_gen_update_cr1("fmsub.\n");
1468     }
1469     return flowContinue;
1470     }
1471     /*
1472     * fmsubsx Floating Multiply-Subtract Single
1473     * .500
1474     */
1475     void ppc_opc_fmsubsx()
1476     {
1477     int frD, frA, frB, frC;
1478     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1479     ppc_double A, B, C, D;
1480     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1481     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1482     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1483     B.s ^= 1;
1484     ppc_fpu_mul_add(D, A, C, B);
1485     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1486     if (gCPU.current_opc & PPC_OPC_Rc) {
1487     // update cr1 flags
1488     PPC_FPU_ERR("fmsubs.\n");
1489     }
1490     }
1491     JITCFlow ppc_opc_gen_fmsubsx()
1492     {
1493     ppc_opc_gen_interpret(ppc_opc_fmsubsx);
1494     return flowEndBlock;
1495     }
1496     /*
1497     * fmulx Floating Multiply (Double-Precision)
1498     * .501
1499     */
1500     void ppc_opc_fmulx()
1501     {
1502     int frD, frA, frB, frC;
1503     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1504     PPC_OPC_ASSERT(frB==0);
1505     ppc_double A, C, D;
1506     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1507     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1508     if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1509     || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1510     gCPU.fpscr |= FPSCR_VXIMZ;
1511     }
1512     ppc_fpu_mul(D, A, C);
1513     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1514     if (gCPU.current_opc & PPC_OPC_Rc) {
1515     // update cr1 flags
1516     PPC_FPU_ERR("fmul.\n");
1517     }
1518     }
1519     JITCFlow ppc_opc_gen_fmulx()
1520     {
1521     #ifdef JITC
1522     int frD, frA, frB, frC;
1523     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1524     PPC_OPC_ASSERT(frB==0);
1525     ppc_opc_gen_binary_floatop(X86_FMUL, X86_FMUL, frD, frA, frC);
1526     if (gJITC.current_opc & PPC_OPC_Rc) {
1527     // update cr1 flags
1528     ppc_opc_gen_update_cr1("fmul.\n");
1529     }
1530     return flowContinue;
1531     #else
1532     ppc_opc_gen_interpret(ppc_opc_fmulx);
1533     return flowEndBlock;
1534     #endif
1535     }
1536     /*
1537     * fmulsx Floating Multiply Single
1538     * .502
1539     */
1540     void ppc_opc_fmulsx()
1541     {
1542     int frD, frA, frB, frC;
1543     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1544     PPC_OPC_ASSERT(frB==0);
1545     ppc_double A, C, D;
1546     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1547     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1548     if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1549     || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1550     gCPU.fpscr |= FPSCR_VXIMZ;
1551     }
1552     ppc_fpu_mul(D, A, C);
1553     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1554     if (gCPU.current_opc & PPC_OPC_Rc) {
1555     // update cr1 flags
1556     PPC_FPU_ERR("fmuls.\n");
1557     }
1558     }
1559     JITCFlow ppc_opc_gen_fmulsx()
1560     {
1561     ppc_opc_gen_interpret(ppc_opc_fmulsx);
1562     return flowEndBlock;
1563     }
1564     /*
1565     * fnabsx Floating Negative Absolute Value
1566     * .503
1567     */
1568     void ppc_opc_fnabsx()
1569     {
1570     int frD, frA, frB;
1571     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1572     PPC_OPC_ASSERT(frA==0);
1573     gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT;
1574     if (gCPU.current_opc & PPC_OPC_Rc) {
1575     // update cr1 flags
1576     PPC_FPU_ERR("fnabs.\n");
1577     }
1578     }
1579     JITCFlow ppc_opc_gen_fnabsx()
1580     {
1581     int frD, frA, frB;
1582     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1583     if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
1584     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1585     if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1586     jitcClobberCarryAndFlags();
1587     if (frD != frB) {
1588     NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
1589     NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1590     NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1591     NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1592     asmALU(X86_MOV, dh, bh);
1593     asmALU(X86_MOV, dl, bl);
1594     asmALU(X86_OR, dh, 0x80000000);
1595     } else {
1596     NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
1597     asmALU(X86_OR, b, 0x80000000);
1598     }
1599     } else {
1600     ppc_opc_gen_unary_floatop(FABS, frD, frB);
1601     asmFSimple(FCHS);
1602     }
1603     if (gJITC.current_opc & PPC_OPC_Rc) {
1604     // update cr1 flags
1605     ppc_opc_gen_update_cr1("fnabs.\n");
1606     }
1607     return flowContinue;
1608     }
1609     /*
1610     * fnegx Floating Negate
1611     * .504
1612     */
1613     void ppc_opc_fnegx()
1614     {
1615     int frD, frA, frB;
1616     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1617     PPC_OPC_ASSERT(frA==0);
1618     gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT;
1619     if (gCPU.current_opc & PPC_OPC_Rc) {
1620     // update cr1 flags
1621     PPC_FPU_ERR("fneg.\n");
1622     }
1623     }
1624     JITCFlow ppc_opc_gen_fnegx()
1625     {
1626     int frD, frA, frB;
1627     PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1628     if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
1629     JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1630     if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1631     jitcClobberCarryAndFlags();
1632     if (frD != frB) {
1633     NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
1634     NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1635     NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1636     NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1637     asmALU(X86_MOV, dh, bh);
1638     asmALU(X86_MOV, dl, bl);
1639     asmALU(X86_XOR, dh, 0x80000000);
1640     } else {
1641     NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
1642     asmALU(X86_XOR, b, 0x80000000);
1643     }
1644     } else {
1645     ppc_opc_gen_unary_floatop(FCHS, frD, frB);
1646     }
1647     if (gJITC.current_opc & PPC_OPC_Rc) {
1648     // update cr1 flags
1649     ppc_opc_gen_update_cr1("fneg.\n");
1650     }
1651     return flowContinue;
1652     }
1653     /*
1654     * fnmaddx Floating Negative Multiply-Add (Double-Precision)
1655     * .505
1656     */
1657     void ppc_opc_fnmaddx()
1658     {
1659     int frD, frA, frB, frC;
1660     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1661     ppc_double A, B, C, D;
1662     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1663     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1664     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1665     ppc_fpu_mul_add(D, A, C, B);
1666     D.s ^= 1;
1667     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1668     if (gCPU.current_opc & PPC_OPC_Rc) {
1669     // update cr1 flags
1670     PPC_FPU_ERR("fnmadd.\n");
1671     }
1672     }
1673     JITCFlow ppc_opc_gen_fnmaddx()
1674     {
1675     int frD, frA, frB, frC;
1676     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1677     ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, true, frD, frA, frC, frB);
1678     if (gJITC.current_opc & PPC_OPC_Rc) {
1679     // update cr1 flags
1680     ppc_opc_gen_update_cr1("fnmadd.\n");
1681     }
1682     return flowContinue;
1683     }
1684     /*
1685     * fnmaddsx Floating Negative Multiply-Add Single
1686     * .506
1687     */
1688     void ppc_opc_fnmaddsx()
1689     {
1690     int frD, frA, frB, frC;
1691     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1692     ppc_double A, B, C, D;
1693     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1694     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1695     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1696     ppc_fpu_mul_add(D, A, C, B);
1697     D.s ^= 1;
1698     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1699     if (gCPU.current_opc & PPC_OPC_Rc) {
1700     // update cr1 flags
1701     PPC_FPU_ERR("fnmadds.\n");
1702     }
1703     }
1704     JITCFlow ppc_opc_gen_fnmaddsx()
1705     {
1706     ppc_opc_gen_interpret(ppc_opc_fnmaddsx);
1707     return flowEndBlock;
1708     }
1709     /*
1710     * fnmsubx Floating Negative Multiply-Subtract (Double-Precision)
1711     * .507
1712     */
1713     void ppc_opc_fnmsubx()
1714     {
1715     int frD, frA, frB, frC;
1716     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1717     ppc_double A, B, C, D;
1718     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1719     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1720     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1721     B.s ^= 1;
1722     ppc_fpu_mul_add(D, A, C, B);
1723     D.s ^= 1;
1724     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1725     if (gCPU.current_opc & PPC_OPC_Rc) {
1726     // update cr1 flags
1727     PPC_FPU_ERR("fnmsub.\n");
1728     }
1729     }
1730     JITCFlow ppc_opc_gen_fnmsubx()
1731     {
1732     int frD, frA, frB, frC;
1733     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1734     ppc_opc_gen_ternary_floatop(X86_FSUBR, X86_FSUB, false, frD, frA, frC, frB);
1735     if (gJITC.current_opc & PPC_OPC_Rc) {
1736     // update cr1 flags
1737     ppc_opc_gen_update_cr1("fnmsub.\n");
1738     }
1739     return flowContinue;
1740     }
1741     /*
1742     * fnmsubsx Floating Negative Multiply-Subtract Single
1743     * .508
1744     */
1745     void ppc_opc_fnmsubsx()
1746     {
1747     int frD, frA, frB, frC;
1748     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1749     ppc_double A, B, C, D;
1750     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1751     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1752     ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1753     B.s ^= 1;
1754     ppc_fpu_mul_add(D, A, C, B);
1755     D.s ^= 1;
1756     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1757     if (gCPU.current_opc & PPC_OPC_Rc) {
1758     // update cr1 flags
1759     PPC_FPU_ERR("fnmsubs.\n");
1760     }
1761     }
1762     JITCFlow ppc_opc_gen_fnmsubsx()
1763     {
1764     ppc_opc_gen_interpret(ppc_opc_fnmsubsx);
1765     return flowEndBlock;
1766     }
1767     /*
1768     * fresx Floating Reciprocal Estimate Single
1769     * .509
1770     */
1771     void ppc_opc_fresx()
1772     {
1773     int frD, frA, frB, frC;
1774     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1775     PPC_OPC_ASSERT(frA==0 && frC==0);
1776     if (gCPU.current_opc & PPC_OPC_Rc) {
1777     // update cr1 flags
1778     PPC_FPU_ERR("fres.\n");
1779     }
1780     PPC_FPU_ERR("fres\n");
1781     }
1782     JITCFlow ppc_opc_gen_fresx()
1783     {
1784     ppc_opc_gen_interpret(ppc_opc_fresx);
1785     return flowEndBlock;
1786     }
1787     /*
1788     * frspx Floating Round to Single
1789     * .511
1790     */
1791     void ppc_opc_frspx()
1792     {
1793     int frD, frA, frB;
1794     PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1795     PPC_OPC_ASSERT(frA==0);
1796     ppc_double B;
1797     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1798     gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]);
1799     if (gCPU.current_opc & PPC_OPC_Rc) {
1800     // update cr1 flags
1801     PPC_FPU_ERR("frsp.\n");
1802     }
1803     }
1804     JITCFlow ppc_opc_gen_frspx()
1805     {
1806     ppc_opc_gen_interpret(ppc_opc_frspx);
1807     return flowEndBlock;
1808     }
1809     /*
1810     * frsqrtex Floating Reciprocal Square Root Estimate
1811     * .512
1812     */
1813     void ppc_opc_frsqrtex()
1814     {
1815     int frD, frA, frB, frC;
1816     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1817     PPC_OPC_ASSERT(frA==0 && frC==0);
1818     ppc_double B;
1819     ppc_double D;
1820     ppc_double E;
1821     ppc_double Q;
1822     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1823     ppc_fpu_sqrt(Q, B);
1824     E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL;
1825     ppc_fpu_div(D, E, Q);
1826     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1827     if (gCPU.current_opc & PPC_OPC_Rc) {
1828     // update cr1 flags
1829     PPC_FPU_ERR("frsqrte.\n");
1830     }
1831     }
1832     JITCFlow ppc_opc_gen_frsqrtex()
1833     {
1834     int frD, frA, frB, frC;
1835     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1836     PPC_OPC_ASSERT(frA==0 && frC==0);
1837     ppc_opc_gen_unary_floatop(FSQRT, frD, frB);
1838     if (gJITC.nativeFloatTOP == 8) {
1839     jitcPopFloatStack(jitcGetClientFloatRegisterMapping(frD), JITC_FLOAT_REG_NONE);
1840     }
1841     gJITC.nativeFloatTOP++;
1842     asmFSimple(FLD1);
1843     asmFArithP_STi(X86_FDIVR, jitcFloatRegisterToNative(jitcGetClientFloatRegisterMapping(frD)));
1844     gJITC.nativeFloatTOP--;
1845     if (gJITC.current_opc & PPC_OPC_Rc) {
1846     // update cr1 flags
1847     ppc_opc_gen_update_cr1("frsqrte.\n");
1848     }
1849     return flowContinue;
1850     }
1851     /*
1852     * fselx Floating Select
1853     * .514
1854     */
1855     void ppc_opc_fselx()
1856     {
1857     int frD, frA, frB, frC;
1858     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1859     ppc_double A;
1860     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1861     if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) {
1862     gCPU.fpr[frD] = gCPU.fpr[frB];
1863     } else {
1864     gCPU.fpr[frD] = gCPU.fpr[frC];
1865     }
1866     if (gCPU.current_opc & PPC_OPC_Rc) {
1867     // update cr1 flags
1868     PPC_FPU_ERR("fsel.\n");
1869     }
1870     }
1871     JITCFlow ppc_opc_gen_fselx()
1872     {
1873     ppc_opc_gen_interpret(ppc_opc_fselx);
1874     return flowEndBlock;
1875     }
1876     /*
1877     * fsqrtx Floating Square Root (Double-Precision)
1878     * .515
1879     */
1880     void ppc_opc_fsqrtx()
1881     {
1882     int frD, frA, frB, frC;
1883     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1884     PPC_OPC_ASSERT(frA==0 && frC==0);
1885     ppc_double B;
1886     ppc_double D;
1887     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1888     ppc_fpu_sqrt(D, B);
1889     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1890     if (gCPU.current_opc & PPC_OPC_Rc) {
1891     // update cr1 flags
1892     PPC_FPU_ERR("fsqrt.\n");
1893     }
1894     }
1895     JITCFlow ppc_opc_gen_fsqrtx()
1896     {
1897     int frD, frA, frB, frC;
1898     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1899     PPC_OPC_ASSERT(frA==0 && frC==0);
1900     ppc_opc_gen_unary_floatop(FSQRT, frD, frB);
1901     if (gJITC.current_opc & PPC_OPC_Rc) {
1902     // update cr1 flags
1903     ppc_opc_gen_update_cr1("fsqrt.\n");
1904     }
1905     return flowContinue;
1906     }
1907     /*
1908     * fsqrtsx Floating Square Root Single
1909     * .515
1910     */
1911     void ppc_opc_fsqrtsx()
1912     {
1913     int frD, frA, frB, frC;
1914     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1915     PPC_OPC_ASSERT(frA==0 && frC==0);
1916     if (gCPU.current_opc & PPC_OPC_Rc) {
1917     // update cr1 flags
1918     PPC_FPU_ERR("fsqrts.\n");
1919     }
1920     PPC_FPU_ERR("fsqrts\n");
1921     }
1922     JITCFlow ppc_opc_gen_fsqrtsx()
1923     {
1924     ppc_opc_gen_interpret(ppc_opc_fsqrtsx);
1925     return flowEndBlock;
1926     }
1927     /*
1928     * fsubx Floating Subtract (Double-Precision)
1929     * .517
1930     */
1931     void ppc_opc_fsubx()
1932     {
1933     int frD, frA, frB, frC;
1934     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1935     PPC_OPC_ASSERT(frC==0);
1936     ppc_double A, B, D;
1937     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1938     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1939     if (B.type != ppc_fpr_NaN) {
1940     B.s ^= 1;
1941     }
1942     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1943     gCPU.fpscr |= FPSCR_VXISI;
1944     }
1945     ppc_fpu_add(D, A, B);
1946     gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1947     if (gCPU.current_opc & PPC_OPC_Rc) {
1948     // update cr1 flags
1949     PPC_FPU_ERR("fsub.\n");
1950     }
1951     }
1952     JITCFlow ppc_opc_gen_fsubx()
1953     {
1954     #ifdef JITC
1955     int frD, frA, frB, frC;
1956     PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1957     PPC_OPC_ASSERT(frC==0);
1958     ppc_opc_gen_binary_floatop(X86_FSUB, X86_FSUBR, frD, frA, frB);
1959     /*
1960     * FIXME: This solves the a floating point bug.
1961     * I have no idea why.
1962     */
1963     jitcFloatRegisterClobberAll();
1964     if (gJITC.current_opc & PPC_OPC_Rc) {
1965     // update cr1 flags
1966     ppc_opc_gen_update_cr1("fsub.\n");
1967     }
1968     return flowContinue;
1969     #else
1970     ppc_opc_gen_interpret(ppc_opc_fsubx);
1971     return flowEndBlock;
1972     #endif
1973     }
1974     /*
1975     * fsubsx Floating Subtract Single
1976     * .518
1977     */
1978     void ppc_opc_fsubsx()
1979     {
1980     int frD, frA, frB, frC;
1981     PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1982     PPC_OPC_ASSERT(frC==0);
1983     ppc_double A, B, D;
1984     ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1985     ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1986     if (B.type != ppc_fpr_NaN) {
1987     B.s ^= 1;
1988     }
1989     if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1990     gCPU.fpscr |= FPSCR_VXISI;
1991     }
1992     ppc_fpu_add(D, A, B);
1993     gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1994     if (gCPU.current_opc & PPC_OPC_Rc) {
1995     // update cr1 flags
1996     PPC_FPU_ERR("fsubs.\n");
1997     }
1998     }
1999     JITCFlow ppc_opc_gen_fsubsx()
2000     {
2001     ppc_opc_gen_interpret(ppc_opc_fsubsx);
2002     return flowEndBlock;
2003     }
2004    

  ViewVC Help
Powered by ViewVC 1.1.26