/[pearpc]/src/cpu/cpu_jitc_x86/ppc_fpu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_jitc_x86/ppc_fpu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 51920 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_fpu.cc
4 *
5 * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6 * Copyright (C) 2003, 2004 Stefan Weyergraf
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include "debug/tracers.h"
23 #include "ppc_cpu.h"
24 #include "ppc_dec.h"
25 #include "ppc_fpu.h"
26
27 // .121
28 #define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b))
29
30 const char *ppc_fpu_get_fpr_type(ppc_fpr_type t)
31 {
32 switch (t) {
33 case ppc_fpr_norm: return "norm";
34 case ppc_fpr_zero: return "zero";
35 case ppc_fpr_NaN: return "NaN";
36 case ppc_fpr_Inf: return "Inf";
37 default: return "???";
38 }
39 }
40
41 inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b)
42 {
43 switch (PPC_FPR_TYPE2(a.type, b.type)) {
44 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
45 int diff = a.e - b.e;
46 if (diff<0) {
47 diff = -diff;
48 if (diff <= 56) {
49 a.m >>= diff;
50 } else if (a.m != 0) {
51 a.m = 1;
52 } else {
53 a.m = 0;
54 }
55 res.e = b.e;
56 } else {
57 if (diff <= 56) {
58 b.m >>= diff;
59 } else if (b.m != 0) {
60 b.m = 1;
61 } else {
62 b.m = 0;
63 }
64 res.e = a.e;
65 }
66 res.type = ppc_fpr_norm;
67 if (a.s == b.s) {
68 res.s = a.s;
69 res.m = a.m + b.m;
70 if (res.m & (1ULL<<56)) {
71 res.m >>= 1;
72 res.e++;
73 }
74 } else {
75 res.s = a.s;
76 res.m = a.m - b.m;
77 if (!res.m) {
78 if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
79 res.s |= b.s;
80 } else {
81 res.s &= b.s;
82 }
83 res.type = ppc_fpr_zero;
84 } else {
85 if ((sint64)res.m < 0) {
86 res.m = b.m - a.m;
87 res.s = b.s;
88 }
89 diff = ppc_fpu_normalize(res) - 8;
90 res.e -= diff;
91 res.m <<= diff;
92 }
93 }
94 break;
95 }
96 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
97 res.s = a.s;
98 res.type = ppc_fpr_NaN;
99 break;
100 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
101 res.e = a.e;
102 // fall-thru
103 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
104 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
105 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
106 res.s = a.s;
107 res.m = a.m;
108 res.type = a.type;
109 break;
110 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
111 res.e = b.e;
112 // fall-thru
113 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
114 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
115 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
116 res.s = b.s;
117 res.m = b.m;
118 res.type = b.type;
119 break;
120 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
121 if (a.s != b.s) {
122 // +oo + -oo == NaN
123 res.s = a.s ^ b.s;
124 res.type = ppc_fpr_NaN;
125 break;
126 }
127 // fall-thru
128 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
129 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
130 res.s = a.s;
131 res.type = a.type;
132 break;
133 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
134 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
135 res.s = b.s;
136 res.type = b.type;
137 break;
138 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
139 // round bla
140 res.type = ppc_fpr_zero;
141 res.s = a.s && b.s;
142 break;
143 }
144 }
145
146 inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp)
147 {
148 if (exp >= 64) {
149 q.m1 = q.m0;
150 q.m0 = 0;
151 exp -= 64;
152 }
153 uint64 t = q.m0 & ((1ULL<<exp)-1);
154 q.m0 >>= exp;
155 q.m1 >>= exp;
156 q.m1 |= t<<(64-exp);
157 }
158
159 inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp)
160 {
161 if (exp >= 64) {
162 q.m0 = q.m1;
163 q.m1 = 0;
164 exp -= 64;
165 }
166 uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1);
167 q.m0 <<= exp;
168 q.m1 <<= exp;
169 q.m0 |= t;
170 }
171
172 inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
173 {
174 res.m1 = a.m1+b.m1;
175 if (res.m1 < a.m1) {
176 res.m0 = a.m0+b.m0+1;
177 } else {
178 res.m0 = a.m0+b.m0;
179 }
180 }
181
182 inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
183 {
184 res.m1 = a.m1-b.m1;
185 if (a.m1 < b.m1) {
186 res.m0 = a.m0-b.m0-1;
187 } else {
188 res.m0 = a.m0-b.m0;
189 }
190 }
191
192 // res has 107 significant bits. a, b have 106 significant bits each.
193 inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b)
194 {
195 // treat as 107 bit mantissa
196 if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1);
197 if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1);
198 switch (PPC_FPR_TYPE2(a.type, b.type)) {
199 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
200 int diff = a.e - b.e;
201 if (diff < 0) {
202 diff = -diff;
203 if (diff <= 107) {
204 // FIXME: may set x_prime
205 ppc_fpu_quadro_mshr(a, diff);
206 } else if (a.m0 || a.m1) {
207 a.m0 = 0;
208 a.m1 = 1;
209 } else {
210 a.m0 = 0;
211 a.m1 = 0;
212 }
213 res.e = b.e;
214 } else {
215 if (diff <= 107) {
216 // FIXME: may set x_prime
217 ppc_fpu_quadro_mshr(b, diff);
218 } else if (b.m0 || b.m1) {
219 b.m0 = 0;
220 b.m1 = 1;
221 } else {
222 b.m0 = 0;
223 b.m1 = 0;
224 }
225 res.e = a.e;
226 }
227 res.type = ppc_fpr_norm;
228 if (a.s == b.s) {
229 res.s = a.s;
230 ppc_fpu_add_quadro_m(res, a, b);
231 int X_prime = res.m1 & 1;
232 if (res.m0 & (1ULL<<(107-64))) {
233 ppc_fpu_quadro_mshr(res, 1);
234 res.e++;
235 }
236 // res = [107]
237 res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime;
238 } else {
239 res.s = a.s;
240 int cmp;
241 if (a.m0 < b.m0) {
242 cmp = -1;
243 } else if (a.m0 > b.m0) {
244 cmp = +1;
245 } else {
246 if (a.m1 < b.m1) {
247 cmp = -1;
248 } else if (a.m1 > b.m1) {
249 cmp = +1;
250 } else {
251 cmp = 0;
252 }
253 }
254 if (!cmp) {
255 if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
256 res.s |= b.s;
257 } else {
258 res.s &= b.s;
259 }
260 res.type = ppc_fpr_zero;
261 } else {
262 if (cmp < 0) {
263 ppc_fpu_sub_quadro_m(res, b, a);
264 res.s = b.s;
265 } else {
266 ppc_fpu_sub_quadro_m(res, a, b);
267 }
268 diff = ppc_fpu_normalize_quadro(res) - (128-107);
269 int X_prime = res.m1 & 1;
270 res.m1 &= 0xfffffffffffffffeULL;
271 ppc_fpu_quadro_mshl(res, diff);
272 res.e -= diff;
273 res.m1 |= X_prime;
274 }
275 // res = [107]
276 }
277 break;
278 }
279 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
280 res.s = a.s;
281 res.type = ppc_fpr_NaN;
282 break;
283 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
284 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
285 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
286 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
287 res.e = a.e;
288 res.s = a.s;
289 res.m0 = a.m0;
290 res.m1 = a.m1;
291 res.type = a.type;
292 break;
293 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
294 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
295 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
296 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
297 res.e = b.e;
298 res.s = b.s;
299 res.m0 = b.m0;
300 res.m1 = b.m1;
301 res.type = b.type;
302 break;
303 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
304 if (a.s != b.s) {
305 // +oo + -oo == NaN
306 res.s = a.s ^ b.s;
307 res.type = ppc_fpr_NaN;
308 break;
309 }
310 // fall-thru
311 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
312 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
313 res.s = a.s;
314 res.type = a.type;
315 break;
316 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
317 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
318 res.s = b.s;
319 res.type = b.type;
320 break;
321 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
322 // round bla
323 res.type = ppc_fpr_zero;
324 res.s = a.s && b.s;
325 break;
326 }
327 }
328
329 inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry)
330 {
331 carry = (a+b < a) ? 1 : 0;
332 a += b;
333 }
334
335 // 'res' has 56 significant bits on return, a + b have 56 significant bits each
336 inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b)
337 {
338 res.s = a.s ^ b.s;
339 switch (PPC_FPR_TYPE2(a.type, b.type)) {
340 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
341 res.type = ppc_fpr_norm;
342 res.e = a.e + b.e;
343 // printf("new exp: %d\n", res.e);
344 // ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m);
345 uint64 fH, fM1, fM2, fL;
346 fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
347 fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
348 fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
349 fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
350 // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
351
352 // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
353 uint64 rL, rH;
354 rL = fL; // rL = rH = [63,64]
355 rH = fH; // rH = fH = [47,48]
356 uint64 split;
357 split = fM1 + fM2;
358 uint64 carry;
359 ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
360 rH += carry; // rH = [0 .. 2^48]
361 rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
362
363 // res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55]
364 // [---------------------------------------------------------]
365 // bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ]
366 // [---------------------------------------------------------]
367 // [15 bits zero | 49 bits rH | 8 most sign.bits rL ]
368 res.m = rH << 9;
369 res.m |= rL >> (64-9);
370 // res.m = [58]
371
372 // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
373 if (res.m & (1ULL << 57)) {
374 res.m >>= 2;
375 res.e += 2;
376 } else if (res.m & (1ULL << 56)) {
377 res.m >>= 1;
378 res.e++;
379 }
380 // res.m = [56]
381 break;
382 }
383 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
384 res.type = a.type;
385 res.e = a.e;
386 break;
387 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
388 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
389 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
390 res.s = a.s;
391 // fall-thru
392 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
393 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
394 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
395 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
396 res.type = a.type;
397 break;
398 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
399 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
400 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
401 res.s = b.s;
402 // fall-thru
403 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
404 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
405 res.type = b.type;
406 break;
407 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
408 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
409 res.type = ppc_fpr_NaN;
410 break;
411 }
412 }
413
414 // 'res' has 'prec' significant bits on return, a + b have 56 significant bits each
415 // for 111 >= prec >= 64
416 inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec)
417 {
418 res.s = a.s ^ b.s;
419 switch (PPC_FPR_TYPE2(a.type, b.type)) {
420 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
421 res.type = ppc_fpr_norm;
422 res.e = a.e + b.e;
423 // printf("new exp: %d\n", res.e);
424 // ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m);
425 uint64 fH, fM1, fM2, fL;
426 fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
427 fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
428 fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
429 fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
430 // ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL);
431
432 // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
433 uint64 rL, rH;
434 rL = fL; // rL = rH = [63,64]
435 rH = fH; // rH = fH = [47,48]
436 uint64 split;
437 split = fM1 + fM2;
438 uint64 carry;
439 ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
440 rH += carry; // rH = [0 .. 2^48]
441 rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
442
443 // res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0]
444 // [-----------------------------------------------------------]
445 // log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ]
446 // [-----------------------------------------------------------]
447 // [ 15 bits zero | 49 bits rH | 64 bits rL ]
448 res.m0 = rH;
449 res.m1 = rL;
450 // res.m0|res.m1 = [111,112,113]
451
452 // ht_printf("res = %016qx%016qx\n", res.m0, res.m1);
453 if (res.m0 & (1ULL << 48)) {
454 ppc_fpu_quadro_mshr(res, 2+(111-prec));
455 res.e += 2;
456 } else if (res.m0 & (1ULL << 47)) {
457 ppc_fpu_quadro_mshr(res, 1+(111-prec));
458 res.e += 1;
459 } else {
460 ppc_fpu_quadro_mshr(res, 111-prec);
461 }
462 // res.m0|res.m1 = [prec]
463 break;
464 }
465 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
466 res.type = a.type;
467 res.e = a.e;
468 break;
469 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
470 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
471 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
472 res.s = a.s;
473 // fall-thru
474 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
475 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
476 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
477 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
478 res.type = a.type;
479 break;
480 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
481 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
482 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
483 res.s = b.s;
484 // fall-thru
485 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
486 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
487 res.type = b.type;
488 break;
489 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
490 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
491 res.type = ppc_fpr_NaN;
492 break;
493 }
494 }
495
496 // calculate one of these:
497 // + m1 * m2 + s
498 // + m1 * m2 - s
499 // - m1 * m2 + s
500 // - m1 * m2 - s
501 // using a 106 bit accumulator
502 //
503 // .752
504 //
505 inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2,
506 ppc_double &s)
507 {
508 ppc_quadro p;
509 /* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e,
510 ppc_fpu_get_fpr_type(m1.type));
511 ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e,
512 ppc_fpu_get_fpr_type(m2.type));*/
513 // create product with 106 significant bits
514 ppc_fpu_mul_quadro(p, m1, m2, 106);
515 /* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e,
516 ppc_fpu_get_fpr_type(p.type));*/
517 // convert s into ppc_quadro
518 /* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e,
519 ppc_fpu_get_fpr_type(s.type));*/
520 ppc_quadro q;
521 q.e = s.e;
522 q.s = s.s;
523 q.type = s.type;
524 q.m0 = 0;
525 q.m1 = s.m;
526 // .. with 106 significant bits
527 ppc_fpu_quadro_mshl(q, 106-56);
528 /* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e,
529 ppc_fpu_get_fpr_type(q.type));*/
530 // now we must add p, q.
531 ppc_quadro x;
532 ppc_fpu_add_quadro(x, p, q);
533 // x = [107]
534 /* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e,
535 ppc_fpu_get_fpr_type(x.type));*/
536 res.type = x.type;
537 res.s = x.s;
538 res.e = x.e;
539 if (x.type == ppc_fpr_norm) {
540 res.m = x.m0 << 13; // 43 bits from m0
541 res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1
542 res.m |= x.m1 & 1; // X' bit from m1
543 }
544 /* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e,
545 ppc_fpu_get_fpr_type(res.type));*/
546 }
547
548 inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b)
549 {
550 res.s = a.s ^ b.s;
551 switch (PPC_FPR_TYPE2(a.type, b.type)) {
552 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
553 res.type = ppc_fpr_norm;
554 res.e = a.e - b.e;
555 res.m = 0;
556 uint64 am = a.m, bm = b.m;
557 uint i = 0;
558 // printf("DIV:\nam=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
559 while (am && (i<56)) {
560 res.m <<= 1;
561 if (am >= bm) {
562 res.m |= 1;
563 am -= bm;
564 }
565 am <<= 1;
566 // printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
567 i++;
568 }
569 res.m <<= 57-i;
570 if (res.m & (1ULL << 56)) {
571 res.m >>= 1;
572 } else {
573 res.e--;
574 }
575 // printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
576 break;
577 }
578 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
579 res.e = a.e;
580 // fall-thru
581 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
582 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
583 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
584 res.s = a.s;
585 // fall-thru
586 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
587 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
588 res.type = a.type;
589 break;
590 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
591 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
592 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
593 res.s = b.s;
594 res.type = b.type;
595 break;
596 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
597 res.type = ppc_fpr_zero;
598 break;
599 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
600 res.type = ppc_fpr_Inf;
601 break;
602 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
603 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
604 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
605 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
606 res.type = ppc_fpr_NaN;
607 break;
608 }
609 }
610
611 inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B)
612 {
613 switch (B.type) {
614 case ppc_fpr_norm:
615 if (B.s) {
616 D.type = ppc_fpr_NaN;
617 gCPU.fpscr |= FPSCR_VXSQRT;
618 break;
619 }
620 // D := 1/2(D_old + B/D_old)
621 D = B;
622 D.e /= 2;
623 for (int i=0; i<6; i++) {
624 ppc_double D_old = D;
625 ppc_double B_div_D_old;
626 ppc_fpu_div(B_div_D_old, B, D_old);
627 ppc_fpu_add(D, D_old, B_div_D_old);
628 D.e--;
629
630 /* uint64 e;
631 ppc_double E = D;
632 ppc_fpu_pack_double(E, e);
633 printf("%.20f\n", *(double *)&e);*/
634 }
635 break;
636 case ppc_fpr_zero:
637 D.type = ppc_fpr_zero;
638 D.s = B.s;
639 break;
640 case ppc_fpr_Inf:
641 if (B.s) {
642 D.type = ppc_fpr_NaN;
643 gCPU.fpscr |= FPSCR_VXSQRT;
644 } else {
645 D.type = ppc_fpr_Inf;
646 D.s = 0;
647 }
648 break;
649 case ppc_fpr_NaN:
650 D.type = ppc_fpr_NaN;
651 break;
652 }
653 }
654
655 void ppc_fpu_test()
656 {
657 double bb = 1.0;
658 uint64 b = *(uint64 *)&bb;
659 ppc_double B;
660 ppc_double D;
661 ppc_fpu_unpack_double(B, b);
662 ht_printf("%d\n", B.e);
663 ppc_fpu_sqrt(D, B);
664 uint64 d;
665 gCPU.fpscr |= ppc_fpu_pack_double(D, d);
666 printf("%f\n", *(double *)&d);
667 /* ppc_double A, B, C, D, E;
668 ppc_fpu_unpack_double(A, 0xc00fafcd6c40e500ULL);
669 ppc_fpu_unpack_double(B, 0xc00fafcd6c40e4beULL);
670 B.s ^= 1;
671 ppc_fpu_add(E, A, B);
672 uint64 e;
673 ppc_fpu_pack_double(E, e);
674 ht_printf("%qx\n", e);
675 ppc_fpu_add(D, E, B);*/
676
677 /* ppc_double A, B, C;
678 double a, b, c;
679 A.type = B.type = ppc_fpr_norm;
680 A.s = 1;
681 A.e = 0;
682 A.m = 0;
683 A.m = ((1ULL<<56)-1)-((1ULL<<10)-1);
684 ht_printf("%qb\n", A.m);
685 B.s = 1;
686 B.e = 0;
687 B.m = 0;
688 B.m = ((1ULL<<56)-1)-((1ULL<<50)-1);
689 a = ppc_fpu_get_double(A);
690 b = ppc_fpu_get_double(B);
691 printf("%f + %f = \n", a, b);
692 ppc_fpu_add(C, A, B);
693 uint64 d;
694 uint32 s;
695 ppc_fpu_pack_double_as_single(C, d);
696 ht_printf("%064qb\n", d);
697 ppc_fpu_unpack_double(C, d);
698 ppc_fpu_pack_single(C, s);
699 ht_printf("single: %032b\n", s);
700 ppc_single Cs;
701 ppc_fpu_unpack_single(Cs, s);
702 ppc_fpu_single_to_double(Cs, C);
703 // ht_printf("%d\n", ppc_fpu_double_to_int(C));
704 c = ppc_fpu_get_double(C);
705 printf("%f\n", c);*/
706 }
707
708 /*
709 * a and b must not be NaNs
710 */
711 inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b)
712 {
713 if (a.type == ppc_fpr_zero) {
714 if (b.type == ppc_fpr_zero) return 2;
715 return (b.s) ? 4: 8;
716 }
717 if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4;
718 if (a.s != b.s) return (a.s) ? 8: 4;
719 if (a.e > b.e) return (a.s) ? 8: 4;
720 if (a.e < b.e) return (a.s) ? 4: 8;
721 if (a.m > b.m) return (a.s) ? 8: 4;
722 if (a.m < b.m) return (a.s) ? 4: 8;
723 return 2;
724 }
725
726 double ppc_fpu_get_double(uint64 d)
727 {
728 ppc_double dd;
729 ppc_fpu_unpack_double(dd, d);
730 return ppc_fpu_get_double(dd);
731 }
732
733 double ppc_fpu_get_double(ppc_double &d)
734 {
735 if (d.type == ppc_fpr_norm) {
736 double r = d.m;
737 for (int i=0; i<55; i++) {
738 r = r / 2.0;
739 }
740 if (d.e < 0) {
741 for (int i=0; i>d.e; i--) {
742 r = r / 2.0;
743 }
744 } else if (d.e > 0) {
745 for (int i=0; i<d.e; i++) {
746 r = r * 2.0;
747 }
748 }
749 if (d.s) r = -r;
750 return r;
751 } else {
752 return 0.0;
753 }
754 }
755
756 /***********************************************************************************
757 *
758 */
759
760 #define SWAP do { \
761 int tmp = frA; frA = frB; frB = tmp; \
762 tmp = a; a = b; b = tmp; \
763 X86FloatArithOp tmpop = op; op = rop; rop = tmpop; \
764 } while(0);
765
766
767 static void ppc_opc_gen_binary_floatop(X86FloatArithOp op, X86FloatArithOp rop, int frD, int frA, int frB)
768 {
769 jitcFloatRegisterClobberAll();
770 // jitcSetFPUPrecision(53);
771
772 // ht_printf("binfloatop: %x: %d: %d, %d, %d\n", gJITC.pc, op, frD, frA, frB);
773 // op == st(i)/st(0)
774 // rop == st(0)/st(i)
775
776 // op == st(0)/mem
777 // rop == mem/st(0)
778
779 // frD := frA (op) frB = frB (rop) frA
780
781 // make sure client float register aren't mapped to integer registers
782 jitcClobberClientRegisterForFloat(frA);
783 jitcClobberClientRegisterForFloat(frB);
784 jitcInvalidateClientRegisterForFloat(frD);
785
786 JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
787 JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB);
788 // ht_printf("%d -> %d\n", frA, a);
789 // ht_printf("%d -> %d\n", frB, b);
790 if (a == JITC_FLOAT_REG_NONE && b != JITC_FLOAT_REG_NONE) {
791 // b is mapped but not a, swap them
792 SWAP;
793 }
794 if (a != JITC_FLOAT_REG_NONE) {
795 // a is mapped
796 if (frB == frD && frA != frD) {
797 // b = st(a) (op) b
798 // ht_printf("case a\n");
799 b = jitcGetClientFloatRegister(frB, a);
800 if (jitcFloatRegisterIsTOP(b)) {
801 asmFArith_ST0(op, jitcFloatRegisterToNative(a));
802 } else {
803 jitcFloatRegisterXCHGToFront(a);
804 asmFArith_STi(rop, jitcFloatRegisterToNative(b));
805 }
806 jitcFloatRegisterDirty(b);
807 } else if (frA == frD) {
808 // st(a) = st(a) (op) b
809 // ht_printf("case b\n");
810 b = jitcGetClientFloatRegister(frB, a);
811 if (jitcFloatRegisterIsTOP(b)) {
812 asmFArith_STi(op, jitcFloatRegisterToNative(a));
813 } else {
814 jitcFloatRegisterXCHGToFront(a);
815 asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
816 }
817 jitcFloatRegisterDirty(a);
818 } else {
819 // ht_printf("case c\n");
820 // frA != frD != frB (and frA is mapped, frD isn't mapped)
821 a = jitcFloatRegisterDup(a, b);
822 // ht_printf("%d\n", b);
823 // now a is TOP
824 if (b != JITC_FLOAT_REG_NONE) {
825 asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
826 } else {
827 modrm_o modrm;
828 asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
829 }
830 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
831 if (d == JITC_FLOAT_REG_NONE) {
832 jitcMapClientFloatRegisterDirty(frD, a);
833 } else {
834 jitcFloatRegisterStoreAndPopTOP(d);
835 jitcFloatRegisterDirty(d);
836 }
837 }
838 } else {
839 // ht_printf("case d\n");
840 // neither a nor b is mapped
841 if (frB == frD && frA != frD) {
842 // frB = frA (op) frB, none of them is mapped
843 b = jitcGetClientFloatRegister(frB);
844 jitcFloatRegisterDirty(b);
845 modrm_o modrm;
846 asmFArith(rop, x86_mem2(modrm, &gCPU.fpr[frA]));
847 return;
848 }
849 if (frA == frD) {
850 // frA = frA (op) frB, none of them is mapped
851 a = jitcGetClientFloatRegister(frA);
852 jitcFloatRegisterDirty(a);
853 modrm_o modrm;
854 asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
855 } else {
856 // frA != frD != frB (and frA, frB aren't mapped)
857 a = jitcGetClientFloatRegisterUnmapped(frA);
858 modrm_o modrm;
859 asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
860 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
861 if (d == JITC_FLOAT_REG_NONE) {
862 jitcMapClientFloatRegisterDirty(frD, a);
863 } else {
864 jitcFloatRegisterStoreAndPopTOP(d);
865 jitcFloatRegisterDirty(d);
866 }
867 }
868 }
869 }
870
871 static inline void ppc_opc_gen_unary_floatop(X86FloatOp op, int frD, int frA)
872 {
873 jitcClobberClientRegisterForFloat(frA);
874 jitcInvalidateClientRegisterForFloat(frD);
875 if (frD == frA) {
876 JitcFloatReg a = jitcGetClientFloatRegister(frA);
877 jitcFloatRegisterDirty(a);
878 jitcFloatRegisterXCHGToFront(a);
879 } else {
880 JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
881 if (a == JITC_FLOAT_REG_NONE) {
882 a = jitcGetClientFloatRegisterUnmapped(frA);
883 } else {
884 a = jitcFloatRegisterDup(a);
885 }
886 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
887 if (d == JITC_FLOAT_REG_NONE) {
888 jitcMapClientFloatRegisterDirty(frD, a);
889 } else {
890 asmFSimple(op);
891 jitcFloatRegisterStoreAndPopTOP(d);
892 jitcFloatRegisterDirty(d);
893 return;
894 }
895 }
896 asmFSimple(op);
897 }
898
899
900 /*
901 fmadd FADD false
902 fmsub FSUB false
903 fnmadd FADD true
904 fnmsub FSUBR false
905 */
906
907 static void ppc_opc_gen_ternary_floatop(X86FloatArithOp op, X86FloatArithOp rop, bool chs, int frD, int frA, int frC, int frB)
908 {
909 jitcFloatRegisterClobberAll();
910 // jitcSetFPUPrecision(64);
911 jitcClobberClientRegisterForFloat(frA);
912 jitcClobberClientRegisterForFloat(frC);
913 jitcClobberClientRegisterForFloat(frB);
914 jitcInvalidateClientRegisterForFloat(frD);
915
916 JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA);
917 if (a != JITC_FLOAT_REG_NONE) {
918 ht_printf("askf lsa flsd\n");
919 a = jitcFloatRegisterDup(a, jitcGetClientFloatRegisterMapping(frC));
920 } else {
921 a = jitcGetClientFloatRegisterUnmapped(frA, jitcGetClientFloatRegisterMapping(frC), jitcGetClientFloatRegisterMapping(frB));
922 }
923 // a is TOP now
924 JitcFloatReg c = jitcGetClientFloatRegisterMapping(frC);
925 if (c != JITC_FLOAT_REG_NONE) {
926 asmFArith_ST0(X86_FMUL, jitcFloatRegisterToNative(c));
927 } else {
928 modrm_o modrm;
929 asmFArith(X86_FMUL, x86_mem2(modrm, &gCPU.fpr[frC]));
930 }
931 JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB);
932 if (b != JITC_FLOAT_REG_NONE) {
933 asmFArith_ST0(rop, jitcFloatRegisterToNative(b));
934 } else {
935 modrm_o modrm;
936 asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB]));
937 }
938 if (chs) {
939 asmFSimple(FCHS);
940 }
941 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
942 if (d == JITC_FLOAT_REG_NONE) {
943 jitcMapClientFloatRegisterDirty(frD, a);
944 } else {
945 jitcFloatRegisterStoreAndPopTOP(d);
946 jitcFloatRegisterDirty(d);
947 }
948 }
949
950 #define JITC
951
952 static void FASTCALL ppc_opc_gen_update_cr1_output_err(const char *err)
953 {
954 PPC_FPU_ERR("%s\n", err);
955 }
956
957 static void ppc_opc_gen_update_cr1(const char *err)
958 {
959 asmALU(X86_MOV, EAX, (uint32)err);
960 asmCALL((NativeAddress)ppc_opc_gen_update_cr1_output_err);
961 }
962
963 /*
964 * fabsx Floating Absolute Value
965 * .484
966 */
967 void ppc_opc_fabsx()
968 {
969 int frD, frA, frB;
970 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
971 PPC_OPC_ASSERT(frA==0);
972 gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT;
973 if (gCPU.current_opc & PPC_OPC_Rc) {
974 // update cr1 flags
975 PPC_FPU_ERR("fabs.\n");
976 }
977 }
978 JITCFlow ppc_opc_gen_fabsx()
979 {
980 int frD, frA, frB;
981 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
982 if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
983 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
984 if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
985 jitcClobberCarryAndFlags();
986 if (frD != frB) {
987 NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
988 NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
989 NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
990 NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
991 asmALU(X86_MOV, dh, bh);
992 asmALU(X86_MOV, dl, bl);
993 asmALU(X86_AND, dh, 0x7fffffff);
994 } else {
995 NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
996 asmALU(X86_AND, b, 0x7fffffff);
997 }
998 } else {
999 ppc_opc_gen_unary_floatop(FABS, frD, frB);
1000 }
1001 if (gJITC.current_opc & PPC_OPC_Rc) {
1002 // update cr1 flags
1003 ppc_opc_gen_update_cr1("fabs.\n");
1004 }
1005 return flowContinue;
1006 }
1007 /*
1008 * faddx Floating Add (Double-Precision)
1009 * .485
1010 */
1011 void ppc_opc_faddx()
1012 {
1013 int frD, frA, frB, frC;
1014 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1015 PPC_OPC_ASSERT(frC==0);
1016 ppc_double A, B, D;
1017 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1018 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1019 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1020 gCPU.fpscr |= FPSCR_VXISI;
1021 }
1022 ppc_fpu_add(D, A, B);
1023 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1024 if (gCPU.current_opc & PPC_OPC_Rc) {
1025 // update cr1 flags
1026 PPC_FPU_ERR("fadd.\n");
1027 }
1028 }
1029 JITCFlow ppc_opc_gen_faddx()
1030 {
1031 #ifdef JITC
1032 int frD, frA, frB, frC;
1033 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1034 PPC_OPC_ASSERT(frC==0);
1035 ppc_opc_gen_binary_floatop(X86_FADD, X86_FADD, frD, frA, frB);
1036 if (gJITC.current_opc & PPC_OPC_Rc) {
1037 // update cr1 flags
1038 ppc_opc_gen_update_cr1("fadd.\n");
1039 }
1040 return flowContinue;
1041 #else
1042 ppc_opc_gen_interpret(ppc_opc_faddx);
1043 return flowEndBlock;
1044 #endif
1045 }
1046 /*
1047 * faddsx Floating Add Single
1048 * .486
1049 */
1050 void ppc_opc_faddsx()
1051 {
1052 int frD, frA, frB, frC;
1053 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1054 PPC_OPC_ASSERT(frC==0);
1055 ppc_double A, B, D;
1056 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1057 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1058 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1059 gCPU.fpscr |= FPSCR_VXISI;
1060 }
1061 ppc_fpu_add(D, A, B);
1062 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1063 if (gCPU.current_opc & PPC_OPC_Rc) {
1064 // update cr1 flags
1065 PPC_FPU_ERR("fadds.\n");
1066 }
1067 }
1068 JITCFlow ppc_opc_gen_faddsx()
1069 {
1070 ppc_opc_gen_interpret(ppc_opc_faddsx);
1071 return flowEndBlock;
1072 }
1073 /*
1074 * fcmpo Floating Compare Ordered
1075 * .488
1076 */
1077 static uint32 ppc_fpu_cmp_and_mask[8] = {
1078 0xfffffff0,
1079 0xffffff0f,
1080 0xfffff0ff,
1081 0xffff0fff,
1082 0xfff0ffff,
1083 0xff0fffff,
1084 0xf0ffffff,
1085 0x0fffffff,
1086 };
1087 void ppc_opc_fcmpo()
1088 {
1089 int crfD, frA, frB;
1090 PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
1091 crfD >>= 2;
1092 ppc_double A, B;
1093 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1094 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1095 uint32 cmp;
1096 if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
1097 gCPU.fpscr |= FPSCR_VXSNAN;
1098 /*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC;
1099 cmp = 1;
1100 } else {
1101 cmp = ppc_fpu_compare(A, B);
1102 }
1103 crfD = 7-crfD;
1104 gCPU.fpscr &= ~0x1f000;
1105 gCPU.fpscr |= (cmp << 12);
1106 gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
1107 gCPU.cr |= (cmp << (crfD * 4));
1108 }
1109 JITCFlow ppc_opc_gen_fcmpo()
1110 {
1111 ppc_opc_gen_interpret(ppc_opc_fcmpo);
1112 return flowEndBlock;
1113 }
1114 /*
1115 * fcmpu Floating Compare Unordered
1116 * .489
1117 */
1118 void ppc_opc_fcmpu()
1119 {
1120 int crfD, frA, frB;
1121 PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
1122 crfD >>= 2;
1123 ppc_double A, B;
1124 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1125 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1126 uint32 cmp;
1127 if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
1128 gCPU.fpscr |= FPSCR_VXSNAN;
1129 cmp = 1;
1130 } else {
1131 cmp = ppc_fpu_compare(A, B);
1132 }
1133 crfD = 7-crfD;
1134 gCPU.fpscr &= ~0x1f000;
1135 gCPU.fpscr |= (cmp << 12);
1136 gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
1137 gCPU.cr |= (cmp << (crfD * 4));
1138 }
1139 JITCFlow ppc_opc_gen_fcmpu()
1140 {
1141 ppc_opc_gen_interpret(ppc_opc_fcmpu);
1142 return flowEndBlock;
1143 }
1144 /*
1145 * fctiwx Floating Convert to Integer Word
1146 * .492
1147 */
1148 void ppc_opc_fctiwx()
1149 {
1150 int frD, frA, frB;
1151 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1152 PPC_OPC_ASSERT(frA==0);
1153 ppc_double B;
1154 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1155 gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
1156 if (gCPU.current_opc & PPC_OPC_Rc) {
1157 // update cr1 flags
1158 PPC_FPU_ERR("fctiw.\n");
1159 }
1160 }
1161 JITCFlow ppc_opc_gen_fctiwx()
1162 {
1163 int frD, frA, frB;
1164 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1165 PPC_OPC_ASSERT(frA==0);
1166 jitcClobberClientRegisterForFloat(frB);
1167 jitcInvalidateClientRegisterForFloat(frD);
1168
1169 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1170 if (frB != frD && d != JITC_FLOAT_REG_NONE) {
1171 jitcFloatRegisterXCHGToFront(d);
1172 asmFFREEP(Float_ST0);
1173 gJITC.nativeFloatRegState[d] = rsUnused;
1174 gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE;
1175 gJITC.nativeFloatTOP--;
1176 }
1177
1178 modrm_o modrm;
1179 JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB);
1180 asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD]));
1181 gJITC.nativeFloatRegState[b] = rsUnused;
1182 gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE;
1183 gJITC.nativeFloatTOP--;
1184
1185 if (gJITC.current_opc & PPC_OPC_Rc) {
1186 // update cr1 flags
1187 ppc_opc_gen_update_cr1("fctiw.\n");
1188 }
1189 return flowContinue;
1190 }
1191 /*
1192 * fctiwzx Floating Convert to Integer Word with Round toward Zero
1193 * .493
1194 */
1195 void ppc_opc_fctiwzx()
1196 {
1197 int frD, frA, frB;
1198 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1199 PPC_OPC_ASSERT(frA==0);
1200 uint32 oldfpscr = gCPU.fpscr;
1201 gCPU.fpscr &= ~3;
1202 gCPU.fpscr |= 1;
1203 ppc_double B;
1204 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1205 gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
1206 gCPU.fpscr = oldfpscr;
1207 if (gCPU.current_opc & PPC_OPC_Rc) {
1208 // update cr1 flags
1209 PPC_FPU_ERR("fctiwz.\n");
1210 }
1211 }
1212 JITCFlow ppc_opc_gen_fctiwzx()
1213 {
1214 int frD, frA, frB;
1215 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1216 PPC_OPC_ASSERT(frA==0);
1217
1218 static uint16 cw = 0xfff;
1219
1220 modrm_o modrm;
1221 if (!gJITC.hostCPUCaps.sse3) {
1222 asmFLDCW(x86_mem2(modrm, &cw));
1223 }
1224
1225 jitcClobberClientRegisterForFloat(frB);
1226 jitcInvalidateClientRegisterForFloat(frD);
1227
1228 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1229 if (frB != frD && d != JITC_FLOAT_REG_NONE) {
1230 jitcFloatRegisterXCHGToFront(d);
1231 asmFFREEP(Float_ST0);
1232 gJITC.nativeFloatRegState[d] = rsUnused;
1233 gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE;
1234 gJITC.nativeFloatTOP--;
1235 }
1236
1237 JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB);
1238 if (gJITC.hostCPUCaps.sse3) {
1239 asmFISTTP(x86_mem2(modrm, &gCPU.fpr[frD]));
1240 } else {
1241 asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD]));
1242 }
1243 gJITC.nativeFloatRegState[b] = rsUnused;
1244 gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE;
1245 gJITC.nativeFloatTOP--;
1246
1247 if (!gJITC.hostCPUCaps.sse3) {
1248 asmFLDCW(x86_mem2(modrm, &gCPU.x87cw));
1249 }
1250
1251 if (gJITC.current_opc & PPC_OPC_Rc) {
1252 // update cr1 flags
1253 ppc_opc_gen_update_cr1("fctiwz.\n");
1254 }
1255 return flowContinue;
1256 }
1257 /*
1258 * fdivx Floating Divide (Double-Precision)
1259 * .494
1260 */
1261 void ppc_opc_fdivx()
1262 {
1263 int frD, frA, frB, frC;
1264 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1265 PPC_OPC_ASSERT(frC==0);
1266 ppc_double A, B, D;
1267 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1268 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1269 if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
1270 gCPU.fpscr |= FPSCR_VXZDZ;
1271 }
1272 if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1273 gCPU.fpscr |= FPSCR_VXIDI;
1274 }
1275 if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
1276 // FIXME::
1277 gCPU.fpscr |= FPSCR_VXIDI;
1278 }
1279 ppc_fpu_div(D, A, B);
1280 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1281 if (gCPU.current_opc & PPC_OPC_Rc) {
1282 // update cr1 flags
1283 PPC_FPU_ERR("fdiv.\n");
1284 }
1285 }
1286 JITCFlow ppc_opc_gen_fdivx()
1287 {
1288 #ifndef JITC
1289 ppc_opc_gen_interpret(ppc_opc_fdivx);
1290 return flowEndBlock;
1291 #else
1292 int frD, frA, frB, frC;
1293 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1294 PPC_OPC_ASSERT(frC==0);
1295 ppc_opc_gen_binary_floatop(X86_FDIV, X86_FDIVR, frD, frA, frB);
1296 if (gJITC.current_opc & PPC_OPC_Rc) {
1297 // update cr1 flags
1298 ppc_opc_gen_update_cr1("fdiv.\n");
1299 }
1300 return flowContinue;
1301 #endif
1302 }
1303 /*
1304 * fdivsx Floating Divide Single
1305 * .495
1306 */
1307 void ppc_opc_fdivsx()
1308 {
1309 int frD, frA, frB, frC;
1310 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1311 PPC_OPC_ASSERT(frC==0);
1312 ppc_double A, B, D;
1313 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1314 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1315 if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
1316 gCPU.fpscr |= FPSCR_VXZDZ;
1317 }
1318 if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1319 gCPU.fpscr |= FPSCR_VXIDI;
1320 }
1321 if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
1322 // FIXME::
1323 gCPU.fpscr |= FPSCR_VXIDI;
1324 }
1325 ppc_fpu_div(D, A, B);
1326 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1327 if (gCPU.current_opc & PPC_OPC_Rc) {
1328 // update cr1 flags
1329 PPC_FPU_ERR("fdivs.\n");
1330 }
1331 }
1332 JITCFlow ppc_opc_gen_fdivsx()
1333 {
1334 ppc_opc_gen_interpret(ppc_opc_fdivsx);
1335 return flowEndBlock;
1336 }
1337 /*
1338 * fmaddx Floating Multiply-Add (Double-Precision)
1339 * .496
1340 */
1341 void ppc_opc_fmaddx()
1342 {
1343 int frD, frA, frB, frC;
1344 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1345 ppc_double A, B, C, D;
1346 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1347 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1348 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1349 ppc_fpu_mul_add(D, A, C, B);
1350 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1351 if (gCPU.current_opc & PPC_OPC_Rc) {
1352 // update cr1 flags
1353 PPC_FPU_ERR("fmadd.\n");
1354 }
1355 }
1356 JITCFlow ppc_opc_gen_fmaddx()
1357 {
1358 int frD, frA, frB, frC;
1359 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1360 ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, false, frD, frA, frC, frB);
1361 if (gJITC.current_opc & PPC_OPC_Rc) {
1362 // update cr1 flags
1363 ppc_opc_gen_update_cr1("fmadd.\n");
1364 }
1365 return flowContinue;
1366 }
1367 /*
1368 * fmaddx Floating Multiply-Add Single
1369 * .497
1370 */
1371 void ppc_opc_fmaddsx()
1372 {
1373 int frD, frA, frB, frC;
1374 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1375 ppc_double A, B, C, D;
1376 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1377 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1378 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1379 ppc_fpu_mul_add(D, A, C, B);
1380 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1381 if (gCPU.current_opc & PPC_OPC_Rc) {
1382 // update cr1 flags
1383 PPC_FPU_ERR("fmadds.\n");
1384 }
1385 }
1386 JITCFlow ppc_opc_gen_fmaddsx()
1387 {
1388 ppc_opc_gen_interpret(ppc_opc_fmaddsx);
1389 return flowEndBlock;
1390 }
1391 /*
1392 * fmrx Floating Move Register
1393 * .498
1394 */
1395 void ppc_opc_fmrx()
1396 {
1397 int frD, rA, frB;
1398 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB);
1399 PPC_OPC_ASSERT(rA==0);
1400 gCPU.fpr[frD] = gCPU.fpr[frB];
1401 if (gCPU.current_opc & PPC_OPC_Rc) {
1402 // update cr1 flags
1403 PPC_FPU_ERR("fmr.\n");
1404 }
1405 }
1406 JITCFlow ppc_opc_gen_fmrx()
1407 {
1408 int frD, frA, frB;
1409 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1410 if (frD != frB) {
1411 JitcFloatReg a = jitcGetClientFloatRegisterMapping(frB);
1412 if (a == JITC_FLOAT_REG_NONE) {
1413 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1414 if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1415 NativeReg bu = jitcGetClientRegister(PPC_FPR_U(frB));
1416 NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1417 NativeReg du = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1418 NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1419 asmALU(X86_MOV, du, bu);
1420 asmALU(X86_MOV, dl, bl);
1421 } else {
1422 jitcInvalidateClientRegisterForFloat(frD);
1423 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1424 if (d == JITC_FLOAT_REG_NONE) {
1425 d = jitcFloatRegisterDup(a);
1426 jitcMapClientFloatRegisterDirty(frD, d);
1427 } else {
1428 jitcFloatRegisterXCHGToFront(a);
1429 asmFST(jitcFloatRegisterToNative(d));
1430 jitcFloatRegisterDirty(d);
1431 }
1432 }
1433 }
1434 if (gJITC.current_opc & PPC_OPC_Rc) {
1435 // update cr1 flags
1436 ppc_opc_gen_update_cr1("fabs.\n");
1437 }
1438 return flowContinue;
1439 }
1440 /*
1441 * fmsubx Floating Multiply-Subtract (Double-Precision)
1442 * .499
1443 */
1444 void ppc_opc_fmsubx()
1445 {
1446 int frD, frA, frB, frC;
1447 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1448 ppc_double A, B, C, D;
1449 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1450 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1451 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1452 B.s ^= 1;
1453 ppc_fpu_mul_add(D, A, C, B);
1454 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1455 if (gCPU.current_opc & PPC_OPC_Rc) {
1456 // update cr1 flags
1457 PPC_FPU_ERR("fmsub.\n");
1458 }
1459 }
1460 JITCFlow ppc_opc_gen_fmsubx()
1461 {
1462 int frD, frA, frB, frC;
1463 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1464 ppc_opc_gen_ternary_floatop(X86_FSUB, X86_FSUBR, false, frD, frA, frC, frB);
1465 if (gJITC.current_opc & PPC_OPC_Rc) {
1466 // update cr1 flags
1467 ppc_opc_gen_update_cr1("fmsub.\n");
1468 }
1469 return flowContinue;
1470 }
1471 /*
1472 * fmsubsx Floating Multiply-Subtract Single
1473 * .500
1474 */
1475 void ppc_opc_fmsubsx()
1476 {
1477 int frD, frA, frB, frC;
1478 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1479 ppc_double A, B, C, D;
1480 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1481 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1482 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1483 B.s ^= 1;
1484 ppc_fpu_mul_add(D, A, C, B);
1485 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1486 if (gCPU.current_opc & PPC_OPC_Rc) {
1487 // update cr1 flags
1488 PPC_FPU_ERR("fmsubs.\n");
1489 }
1490 }
1491 JITCFlow ppc_opc_gen_fmsubsx()
1492 {
1493 ppc_opc_gen_interpret(ppc_opc_fmsubsx);
1494 return flowEndBlock;
1495 }
1496 /*
1497 * fmulx Floating Multiply (Double-Precision)
1498 * .501
1499 */
1500 void ppc_opc_fmulx()
1501 {
1502 int frD, frA, frB, frC;
1503 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1504 PPC_OPC_ASSERT(frB==0);
1505 ppc_double A, C, D;
1506 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1507 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1508 if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1509 || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1510 gCPU.fpscr |= FPSCR_VXIMZ;
1511 }
1512 ppc_fpu_mul(D, A, C);
1513 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1514 if (gCPU.current_opc & PPC_OPC_Rc) {
1515 // update cr1 flags
1516 PPC_FPU_ERR("fmul.\n");
1517 }
1518 }
1519 JITCFlow ppc_opc_gen_fmulx()
1520 {
1521 #ifdef JITC
1522 int frD, frA, frB, frC;
1523 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1524 PPC_OPC_ASSERT(frB==0);
1525 ppc_opc_gen_binary_floatop(X86_FMUL, X86_FMUL, frD, frA, frC);
1526 if (gJITC.current_opc & PPC_OPC_Rc) {
1527 // update cr1 flags
1528 ppc_opc_gen_update_cr1("fmul.\n");
1529 }
1530 return flowContinue;
1531 #else
1532 ppc_opc_gen_interpret(ppc_opc_fmulx);
1533 return flowEndBlock;
1534 #endif
1535 }
1536 /*
1537 * fmulsx Floating Multiply Single
1538 * .502
1539 */
1540 void ppc_opc_fmulsx()
1541 {
1542 int frD, frA, frB, frC;
1543 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1544 PPC_OPC_ASSERT(frB==0);
1545 ppc_double A, C, D;
1546 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1547 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1548 if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1549 || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1550 gCPU.fpscr |= FPSCR_VXIMZ;
1551 }
1552 ppc_fpu_mul(D, A, C);
1553 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1554 if (gCPU.current_opc & PPC_OPC_Rc) {
1555 // update cr1 flags
1556 PPC_FPU_ERR("fmuls.\n");
1557 }
1558 }
1559 JITCFlow ppc_opc_gen_fmulsx()
1560 {
1561 ppc_opc_gen_interpret(ppc_opc_fmulsx);
1562 return flowEndBlock;
1563 }
1564 /*
1565 * fnabsx Floating Negative Absolute Value
1566 * .503
1567 */
1568 void ppc_opc_fnabsx()
1569 {
1570 int frD, frA, frB;
1571 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1572 PPC_OPC_ASSERT(frA==0);
1573 gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT;
1574 if (gCPU.current_opc & PPC_OPC_Rc) {
1575 // update cr1 flags
1576 PPC_FPU_ERR("fnabs.\n");
1577 }
1578 }
1579 JITCFlow ppc_opc_gen_fnabsx()
1580 {
1581 int frD, frA, frB;
1582 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1583 if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
1584 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1585 if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1586 jitcClobberCarryAndFlags();
1587 if (frD != frB) {
1588 NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
1589 NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1590 NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1591 NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1592 asmALU(X86_MOV, dh, bh);
1593 asmALU(X86_MOV, dl, bl);
1594 asmALU(X86_OR, dh, 0x80000000);
1595 } else {
1596 NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
1597 asmALU(X86_OR, b, 0x80000000);
1598 }
1599 } else {
1600 ppc_opc_gen_unary_floatop(FABS, frD, frB);
1601 asmFSimple(FCHS);
1602 }
1603 if (gJITC.current_opc & PPC_OPC_Rc) {
1604 // update cr1 flags
1605 ppc_opc_gen_update_cr1("fnabs.\n");
1606 }
1607 return flowContinue;
1608 }
1609 /*
1610 * fnegx Floating Negate
1611 * .504
1612 */
1613 void ppc_opc_fnegx()
1614 {
1615 int frD, frA, frB;
1616 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1617 PPC_OPC_ASSERT(frA==0);
1618 gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT;
1619 if (gCPU.current_opc & PPC_OPC_Rc) {
1620 // update cr1 flags
1621 PPC_FPU_ERR("fneg.\n");
1622 }
1623 }
1624 JITCFlow ppc_opc_gen_fnegx()
1625 {
1626 int frD, frA, frB;
1627 PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB);
1628 if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) {
1629 JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD);
1630 if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d);
1631 jitcClobberCarryAndFlags();
1632 if (frD != frB) {
1633 NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB));
1634 NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB));
1635 NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD));
1636 NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD));
1637 asmALU(X86_MOV, dh, bh);
1638 asmALU(X86_MOV, dl, bl);
1639 asmALU(X86_XOR, dh, 0x80000000);
1640 } else {
1641 NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB));
1642 asmALU(X86_XOR, b, 0x80000000);
1643 }
1644 } else {
1645 ppc_opc_gen_unary_floatop(FCHS, frD, frB);
1646 }
1647 if (gJITC.current_opc & PPC_OPC_Rc) {
1648 // update cr1 flags
1649 ppc_opc_gen_update_cr1("fneg.\n");
1650 }
1651 return flowContinue;
1652 }
1653 /*
1654 * fnmaddx Floating Negative Multiply-Add (Double-Precision)
1655 * .505
1656 */
1657 void ppc_opc_fnmaddx()
1658 {
1659 int frD, frA, frB, frC;
1660 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1661 ppc_double A, B, C, D;
1662 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1663 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1664 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1665 ppc_fpu_mul_add(D, A, C, B);
1666 D.s ^= 1;
1667 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1668 if (gCPU.current_opc & PPC_OPC_Rc) {
1669 // update cr1 flags
1670 PPC_FPU_ERR("fnmadd.\n");
1671 }
1672 }
1673 JITCFlow ppc_opc_gen_fnmaddx()
1674 {
1675 int frD, frA, frB, frC;
1676 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1677 ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, true, frD, frA, frC, frB);
1678 if (gJITC.current_opc & PPC_OPC_Rc) {
1679 // update cr1 flags
1680 ppc_opc_gen_update_cr1("fnmadd.\n");
1681 }
1682 return flowContinue;
1683 }
1684 /*
1685 * fnmaddsx Floating Negative Multiply-Add Single
1686 * .506
1687 */
1688 void ppc_opc_fnmaddsx()
1689 {
1690 int frD, frA, frB, frC;
1691 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1692 ppc_double A, B, C, D;
1693 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1694 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1695 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1696 ppc_fpu_mul_add(D, A, C, B);
1697 D.s ^= 1;
1698 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1699 if (gCPU.current_opc & PPC_OPC_Rc) {
1700 // update cr1 flags
1701 PPC_FPU_ERR("fnmadds.\n");
1702 }
1703 }
1704 JITCFlow ppc_opc_gen_fnmaddsx()
1705 {
1706 ppc_opc_gen_interpret(ppc_opc_fnmaddsx);
1707 return flowEndBlock;
1708 }
1709 /*
1710 * fnmsubx Floating Negative Multiply-Subtract (Double-Precision)
1711 * .507
1712 */
1713 void ppc_opc_fnmsubx()
1714 {
1715 int frD, frA, frB, frC;
1716 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1717 ppc_double A, B, C, D;
1718 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1719 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1720 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1721 B.s ^= 1;
1722 ppc_fpu_mul_add(D, A, C, B);
1723 D.s ^= 1;
1724 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1725 if (gCPU.current_opc & PPC_OPC_Rc) {
1726 // update cr1 flags
1727 PPC_FPU_ERR("fnmsub.\n");
1728 }
1729 }
1730 JITCFlow ppc_opc_gen_fnmsubx()
1731 {
1732 int frD, frA, frB, frC;
1733 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1734 ppc_opc_gen_ternary_floatop(X86_FSUBR, X86_FSUB, false, frD, frA, frC, frB);
1735 if (gJITC.current_opc & PPC_OPC_Rc) {
1736 // update cr1 flags
1737 ppc_opc_gen_update_cr1("fnmsub.\n");
1738 }
1739 return flowContinue;
1740 }
1741 /*
1742 * fnmsubsx Floating Negative Multiply-Subtract Single
1743 * .508
1744 */
1745 void ppc_opc_fnmsubsx()
1746 {
1747 int frD, frA, frB, frC;
1748 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1749 ppc_double A, B, C, D;
1750 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1751 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1752 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1753 B.s ^= 1;
1754 ppc_fpu_mul_add(D, A, C, B);
1755 D.s ^= 1;
1756 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1757 if (gCPU.current_opc & PPC_OPC_Rc) {
1758 // update cr1 flags
1759 PPC_FPU_ERR("fnmsubs.\n");
1760 }
1761 }
1762 JITCFlow ppc_opc_gen_fnmsubsx()
1763 {
1764 ppc_opc_gen_interpret(ppc_opc_fnmsubsx);
1765 return flowEndBlock;
1766 }
1767 /*
1768 * fresx Floating Reciprocal Estimate Single
1769 * .509
1770 */
1771 void ppc_opc_fresx()
1772 {
1773 int frD, frA, frB, frC;
1774 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1775 PPC_OPC_ASSERT(frA==0 && frC==0);
1776 if (gCPU.current_opc & PPC_OPC_Rc) {
1777 // update cr1 flags
1778 PPC_FPU_ERR("fres.\n");
1779 }
1780 PPC_FPU_ERR("fres\n");
1781 }
1782 JITCFlow ppc_opc_gen_fresx()
1783 {
1784 ppc_opc_gen_interpret(ppc_opc_fresx);
1785 return flowEndBlock;
1786 }
1787 /*
1788 * frspx Floating Round to Single
1789 * .511
1790 */
1791 void ppc_opc_frspx()
1792 {
1793 int frD, frA, frB;
1794 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1795 PPC_OPC_ASSERT(frA==0);
1796 ppc_double B;
1797 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1798 gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]);
1799 if (gCPU.current_opc & PPC_OPC_Rc) {
1800 // update cr1 flags
1801 PPC_FPU_ERR("frsp.\n");
1802 }
1803 }
1804 JITCFlow ppc_opc_gen_frspx()
1805 {
1806 ppc_opc_gen_interpret(ppc_opc_frspx);
1807 return flowEndBlock;
1808 }
1809 /*
1810 * frsqrtex Floating Reciprocal Square Root Estimate
1811 * .512
1812 */
1813 void ppc_opc_frsqrtex()
1814 {
1815 int frD, frA, frB, frC;
1816 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1817 PPC_OPC_ASSERT(frA==0 && frC==0);
1818 ppc_double B;
1819 ppc_double D;
1820 ppc_double E;
1821 ppc_double Q;
1822 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1823 ppc_fpu_sqrt(Q, B);
1824 E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL;
1825 ppc_fpu_div(D, E, Q);
1826 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1827 if (gCPU.current_opc & PPC_OPC_Rc) {
1828 // update cr1 flags
1829 PPC_FPU_ERR("frsqrte.\n");
1830 }
1831 }
1832 JITCFlow ppc_opc_gen_frsqrtex()
1833 {
1834 int frD, frA, frB, frC;
1835 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1836 PPC_OPC_ASSERT(frA==0 && frC==0);
1837 ppc_opc_gen_unary_floatop(FSQRT, frD, frB);
1838 if (gJITC.nativeFloatTOP == 8) {
1839 jitcPopFloatStack(jitcGetClientFloatRegisterMapping(frD), JITC_FLOAT_REG_NONE);
1840 }
1841 gJITC.nativeFloatTOP++;
1842 asmFSimple(FLD1);
1843 asmFArithP_STi(X86_FDIVR, jitcFloatRegisterToNative(jitcGetClientFloatRegisterMapping(frD)));
1844 gJITC.nativeFloatTOP--;
1845 if (gJITC.current_opc & PPC_OPC_Rc) {
1846 // update cr1 flags
1847 ppc_opc_gen_update_cr1("frsqrte.\n");
1848 }
1849 return flowContinue;
1850 }
1851 /*
1852 * fselx Floating Select
1853 * .514
1854 */
1855 void ppc_opc_fselx()
1856 {
1857 int frD, frA, frB, frC;
1858 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1859 ppc_double A;
1860 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1861 if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) {
1862 gCPU.fpr[frD] = gCPU.fpr[frB];
1863 } else {
1864 gCPU.fpr[frD] = gCPU.fpr[frC];
1865 }
1866 if (gCPU.current_opc & PPC_OPC_Rc) {
1867 // update cr1 flags
1868 PPC_FPU_ERR("fsel.\n");
1869 }
1870 }
1871 JITCFlow ppc_opc_gen_fselx()
1872 {
1873 ppc_opc_gen_interpret(ppc_opc_fselx);
1874 return flowEndBlock;
1875 }
1876 /*
1877 * fsqrtx Floating Square Root (Double-Precision)
1878 * .515
1879 */
1880 void ppc_opc_fsqrtx()
1881 {
1882 int frD, frA, frB, frC;
1883 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1884 PPC_OPC_ASSERT(frA==0 && frC==0);
1885 ppc_double B;
1886 ppc_double D;
1887 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1888 ppc_fpu_sqrt(D, B);
1889 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1890 if (gCPU.current_opc & PPC_OPC_Rc) {
1891 // update cr1 flags
1892 PPC_FPU_ERR("fsqrt.\n");
1893 }
1894 }
1895 JITCFlow ppc_opc_gen_fsqrtx()
1896 {
1897 int frD, frA, frB, frC;
1898 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1899 PPC_OPC_ASSERT(frA==0 && frC==0);
1900 ppc_opc_gen_unary_floatop(FSQRT, frD, frB);
1901 if (gJITC.current_opc & PPC_OPC_Rc) {
1902 // update cr1 flags
1903 ppc_opc_gen_update_cr1("fsqrt.\n");
1904 }
1905 return flowContinue;
1906 }
1907 /*
1908 * fsqrtsx Floating Square Root Single
1909 * .515
1910 */
1911 void ppc_opc_fsqrtsx()
1912 {
1913 int frD, frA, frB, frC;
1914 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1915 PPC_OPC_ASSERT(frA==0 && frC==0);
1916 if (gCPU.current_opc & PPC_OPC_Rc) {
1917 // update cr1 flags
1918 PPC_FPU_ERR("fsqrts.\n");
1919 }
1920 PPC_FPU_ERR("fsqrts\n");
1921 }
1922 JITCFlow ppc_opc_gen_fsqrtsx()
1923 {
1924 ppc_opc_gen_interpret(ppc_opc_fsqrtsx);
1925 return flowEndBlock;
1926 }
1927 /*
1928 * fsubx Floating Subtract (Double-Precision)
1929 * .517
1930 */
1931 void ppc_opc_fsubx()
1932 {
1933 int frD, frA, frB, frC;
1934 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1935 PPC_OPC_ASSERT(frC==0);
1936 ppc_double A, B, D;
1937 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1938 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1939 if (B.type != ppc_fpr_NaN) {
1940 B.s ^= 1;
1941 }
1942 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1943 gCPU.fpscr |= FPSCR_VXISI;
1944 }
1945 ppc_fpu_add(D, A, B);
1946 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1947 if (gCPU.current_opc & PPC_OPC_Rc) {
1948 // update cr1 flags
1949 PPC_FPU_ERR("fsub.\n");
1950 }
1951 }
1952 JITCFlow ppc_opc_gen_fsubx()
1953 {
1954 #ifdef JITC
1955 int frD, frA, frB, frC;
1956 PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC);
1957 PPC_OPC_ASSERT(frC==0);
1958 ppc_opc_gen_binary_floatop(X86_FSUB, X86_FSUBR, frD, frA, frB);
1959 /*
1960 * FIXME: This solves the a floating point bug.
1961 * I have no idea why.
1962 */
1963 jitcFloatRegisterClobberAll();
1964 if (gJITC.current_opc & PPC_OPC_Rc) {
1965 // update cr1 flags
1966 ppc_opc_gen_update_cr1("fsub.\n");
1967 }
1968 return flowContinue;
1969 #else
1970 ppc_opc_gen_interpret(ppc_opc_fsubx);
1971 return flowEndBlock;
1972 #endif
1973 }
1974 /*
1975 * fsubsx Floating Subtract Single
1976 * .518
1977 */
1978 void ppc_opc_fsubsx()
1979 {
1980 int frD, frA, frB, frC;
1981 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1982 PPC_OPC_ASSERT(frC==0);
1983 ppc_double A, B, D;
1984 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1985 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1986 if (B.type != ppc_fpr_NaN) {
1987 B.s ^= 1;
1988 }
1989 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1990 gCPU.fpscr |= FPSCR_VXISI;
1991 }
1992 ppc_fpu_add(D, A, B);
1993 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1994 if (gCPU.current_opc & PPC_OPC_Rc) {
1995 // update cr1 flags
1996 PPC_FPU_ERR("fsubs.\n");
1997 }
1998 }
1999 JITCFlow ppc_opc_gen_fsubsx()
2000 {
2001 ppc_opc_gen_interpret(ppc_opc_fsubsx);
2002 return flowEndBlock;
2003 }
2004

  ViewVC Help
Powered by ViewVC 1.1.26