/[pearpc]/src/cpu/cpu_generic/ppc_fpu.cc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/cpu/cpu_generic/ppc_fpu.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (12 years, 2 months ago) by dpavlin
File size: 34024 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * ppc_fpu.cc
4 *
5 * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
6 * Copyright (C) 2003 Stefan Weyergraf
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include "debug/tracers.h"
23 #include "ppc_cpu.h"
24 #include "ppc_dec.h"
25 #include "ppc_fpu.h"
26
27 // .121
28
29
30 #define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b))
31 inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b)
32 {
33 switch (PPC_FPR_TYPE2(a.type, b.type)) {
34 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
35 int diff = a.e - b.e;
36 if (diff<0) {
37 diff = -diff;
38 if (diff <= 56) {
39 a.m >>= diff;
40 } else if (a.m != 0) {
41 a.m = 1;
42 } else {
43 a.m = 0;
44 }
45 res.e = b.e;
46 } else {
47 if (diff <= 56) {
48 b.m >>= diff;
49 } else if (b.m != 0) {
50 b.m = 1;
51 } else {
52 b.m = 0;
53 }
54 res.e = a.e;
55 }
56 res.type = ppc_fpr_norm;
57 if (a.s == b.s) {
58 res.s = a.s;
59 res.m = a.m + b.m;
60 if (res.m & (1ULL<<56)) {
61 res.m >>= 1;
62 res.e++;
63 }
64 } else {
65 res.s = a.s;
66 res.m = a.m - b.m;
67 if (!res.m) {
68 if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
69 res.s |= b.s;
70 } else {
71 res.s &= b.s;
72 }
73 res.type = ppc_fpr_zero;
74 } else {
75 if ((sint64)res.m < 0) {
76 res.m = b.m - a.m;
77 res.s = b.s;
78 }
79 diff = ppc_fpu_normalize(res) - 8;
80 res.e -= diff;
81 res.m <<= diff;
82 }
83 }
84 break;
85 }
86 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
87 res.s = a.s;
88 res.type = ppc_fpr_NaN;
89 break;
90 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
91 res.e = a.e;
92 // fall-thru
93 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
94 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
95 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
96 res.s = a.s;
97 res.m = a.m;
98 res.type = a.type;
99 break;
100 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
101 res.e = b.e;
102 // fall-thru
103 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
104 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
105 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
106 res.s = b.s;
107 res.m = b.m;
108 res.type = b.type;
109 break;
110 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
111 if (a.s != b.s) {
112 // +oo + -oo == NaN
113 res.s = a.s ^ b.s;
114 res.type = ppc_fpr_NaN;
115 break;
116 }
117 // fall-thru
118 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
119 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
120 res.s = a.s;
121 res.type = a.type;
122 break;
123 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
124 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
125 res.s = b.s;
126 res.type = b.type;
127 break;
128 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
129 // round bla
130 res.type = ppc_fpr_zero;
131 res.s = a.s && b.s;
132 break;
133 }
134 }
135
136 inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp)
137 {
138 if (exp >= 64) {
139 q.m1 = q.m0;
140 q.m0 = 0;
141 exp -= 64;
142 }
143 uint64 t = q.m0 & ((1ULL<<exp)-1);
144 q.m0 >>= exp;
145 q.m1 >>= exp;
146 q.m1 |= t<<(64-exp);
147 }
148
149 inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp)
150 {
151 if (exp >= 64) {
152 q.m0 = q.m1;
153 q.m1 = 0;
154 exp -= 64;
155 }
156 uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1);
157 q.m0 <<= exp;
158 q.m1 <<= exp;
159 q.m0 |= t;
160 }
161
162 inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
163 {
164 res.m1 = a.m1+b.m1;
165 if (res.m1 < a.m1) {
166 res.m0 = a.m0+b.m0+1;
167 } else {
168 res.m0 = a.m0+b.m0;
169 }
170 }
171
172 inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b)
173 {
174 res.m1 = a.m1-b.m1;
175 if (a.m1 < b.m1) {
176 res.m0 = a.m0-b.m0-1;
177 } else {
178 res.m0 = a.m0-b.m0;
179 }
180 }
181
182 // res has 107 significant bits. a, b have 106 significant bits each.
183 inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b)
184 {
185 // treat as 107 bit mantissa
186 if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1);
187 if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1);
188 switch (PPC_FPR_TYPE2(a.type, b.type)) {
189 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
190 int diff = a.e - b.e;
191 if (diff < 0) {
192 diff = -diff;
193 if (diff <= 107) {
194 // FIXME: may set x_prime
195 ppc_fpu_quadro_mshr(a, diff);
196 } else if (a.m0 || a.m1) {
197 a.m0 = 0;
198 a.m1 = 1;
199 } else {
200 a.m0 = 0;
201 a.m1 = 0;
202 }
203 res.e = b.e;
204 } else {
205 if (diff <= 107) {
206 // FIXME: may set x_prime
207 ppc_fpu_quadro_mshr(b, diff);
208 } else if (b.m0 || b.m1) {
209 b.m0 = 0;
210 b.m1 = 1;
211 } else {
212 b.m0 = 0;
213 b.m1 = 0;
214 }
215 res.e = a.e;
216 }
217 res.type = ppc_fpr_norm;
218 if (a.s == b.s) {
219 res.s = a.s;
220 ppc_fpu_add_quadro_m(res, a, b);
221 int X_prime = res.m1 & 1;
222 if (res.m0 & (1ULL<<(107-64))) {
223 ppc_fpu_quadro_mshr(res, 1);
224 res.e++;
225 }
226 // res = [107]
227 res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime;
228 } else {
229 res.s = a.s;
230 int cmp;
231 if (a.m0 < b.m0) {
232 cmp = -1;
233 } else if (a.m0 > b.m0) {
234 cmp = +1;
235 } else {
236 if (a.m1 < b.m1) {
237 cmp = -1;
238 } else if (a.m1 > b.m1) {
239 cmp = +1;
240 } else {
241 cmp = 0;
242 }
243 }
244 if (!cmp) {
245 if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) {
246 res.s |= b.s;
247 } else {
248 res.s &= b.s;
249 }
250 res.type = ppc_fpr_zero;
251 } else {
252 if (cmp < 0) {
253 ppc_fpu_sub_quadro_m(res, b, a);
254 res.s = b.s;
255 } else {
256 ppc_fpu_sub_quadro_m(res, a, b);
257 }
258 diff = ppc_fpu_normalize_quadro(res) - (128-107);
259 int X_prime = res.m1 & 1;
260 res.m1 &= 0xfffffffffffffffeULL;
261 ppc_fpu_quadro_mshl(res, diff);
262 res.e -= diff;
263 res.m1 |= X_prime;
264 }
265 // res = [107]
266 }
267 break;
268 }
269 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
270 res.s = a.s;
271 res.type = ppc_fpr_NaN;
272 break;
273 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
274 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
275 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
276 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
277 res.e = a.e;
278 res.s = a.s;
279 res.m0 = a.m0;
280 res.m1 = a.m1;
281 res.type = a.type;
282 break;
283 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
284 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
285 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
286 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
287 res.e = b.e;
288 res.s = b.s;
289 res.m0 = b.m0;
290 res.m1 = b.m1;
291 res.type = b.type;
292 break;
293 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
294 if (a.s != b.s) {
295 // +oo + -oo == NaN
296 res.s = a.s ^ b.s;
297 res.type = ppc_fpr_NaN;
298 break;
299 }
300 // fall-thru
301 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
302 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
303 res.s = a.s;
304 res.type = a.type;
305 break;
306 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
307 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
308 res.s = b.s;
309 res.type = b.type;
310 break;
311 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
312 // round bla
313 res.type = ppc_fpr_zero;
314 res.s = a.s && b.s;
315 break;
316 }
317 }
318
319 inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry)
320 {
321 carry = (a+b < a) ? 1 : 0;
322 a += b;
323 }
324
325 // 'res' has 56 significant bits on return, a + b have 56 significant bits each
326 inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b)
327 {
328 res.s = a.s ^ b.s;
329 switch (PPC_FPR_TYPE2(a.type, b.type)) {
330 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
331 res.type = ppc_fpr_norm;
332 res.e = a.e + b.e;
333 // printf("new exp: %d\n", res.e);
334 // ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m);
335 uint64 fH, fM1, fM2, fL;
336 fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
337 fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
338 fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
339 fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
340 // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
341
342 // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
343 uint64 rL, rH;
344 rL = fL; // rL = rH = [63,64]
345 rH = fH; // rH = fH = [47,48]
346 uint64 split;
347 split = fM1 + fM2;
348 uint64 carry;
349 ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
350 rH += carry; // rH = [0 .. 2^48]
351 rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
352
353 // res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55]
354 // [---------------------------------------------------------]
355 // bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ]
356 // [---------------------------------------------------------]
357 // [15 bits zero | 49 bits rH | 8 most sign.bits rL ]
358 res.m = rH << 9;
359 res.m |= rL >> (64-9);
360 // res.m = [58]
361
362 // ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL);
363 if (res.m & (1ULL << 57)) {
364 res.m >>= 2;
365 res.e += 2;
366 } else if (res.m & (1ULL << 56)) {
367 res.m >>= 1;
368 res.e++;
369 }
370 // res.m = [56]
371 break;
372 }
373 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
374 res.type = a.type;
375 res.e = a.e;
376 break;
377 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
378 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
379 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
380 res.s = a.s;
381 // fall-thru
382 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
383 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
384 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
385 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
386 res.type = a.type;
387 break;
388 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
389 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
390 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
391 res.s = b.s;
392 // fall-thru
393 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
394 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
395 res.type = b.type;
396 break;
397 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
398 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
399 res.type = ppc_fpr_NaN;
400 break;
401 }
402 }
403
404 // 'res' has 'prec' significant bits on return, a + b have 56 significant bits each
405 // for 111 >= prec >= 64
406 inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec)
407 {
408 res.s = a.s ^ b.s;
409 switch (PPC_FPR_TYPE2(a.type, b.type)) {
410 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
411 res.type = ppc_fpr_norm;
412 res.e = a.e + b.e;
413 // printf("new exp: %d\n", res.e);
414 // ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m);
415 uint64 fH, fM1, fM2, fL;
416 fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64]
417 fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56]
418 fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56]
419 fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48]
420 // ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL);
421
422 // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL
423 uint64 rL, rH;
424 rL = fL; // rL = rH = [63,64]
425 rH = fH; // rH = fH = [47,48]
426 uint64 split;
427 split = fM1 + fM2;
428 uint64 carry;
429 ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64]
430 rH += carry; // rH = [0 .. 2^48]
431 rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set
432
433 // res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0]
434 // [-----------------------------------------------------------]
435 // log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ]
436 // [-----------------------------------------------------------]
437 // [ 15 bits zero | 49 bits rH | 64 bits rL ]
438 res.m0 = rH;
439 res.m1 = rL;
440 // res.m0|res.m1 = [111,112,113]
441
442 // ht_printf("res = %016qx%016qx\n", res.m0, res.m1);
443 if (res.m0 & (1ULL << 48)) {
444 ppc_fpu_quadro_mshr(res, 2+(111-prec));
445 res.e += 2;
446 } else if (res.m0 & (1ULL << 47)) {
447 ppc_fpu_quadro_mshr(res, 1+(111-prec));
448 res.e += 1;
449 } else {
450 ppc_fpu_quadro_mshr(res, 111-prec);
451 }
452 // res.m0|res.m1 = [prec]
453 break;
454 }
455 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
456 res.type = a.type;
457 res.e = a.e;
458 break;
459 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
460 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
461 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
462 res.s = a.s;
463 // fall-thru
464 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
465 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
466 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
467 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
468 res.type = a.type;
469 break;
470 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
471 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
472 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
473 res.s = b.s;
474 // fall-thru
475 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
476 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
477 res.type = b.type;
478 break;
479 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
480 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
481 res.type = ppc_fpr_NaN;
482 break;
483 }
484 }
485
486 // calculate one of these:
487 // + m1 * m2 + s
488 // + m1 * m2 - s
489 // - m1 * m2 + s
490 // - m1 * m2 - s
491 // using a 106 bit accumulator
492 //
493 // .752
494 //
495 // FIXME: There is a bug in this code that shows up in Mac OS X Finder fwd/bwd
496 // button: the top line is not rendered correctly. This works with the jitc_x86
497 // FPU however...
498 inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2,
499 ppc_double &s)
500 {
501 ppc_quadro p;
502 /* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e,
503 ppc_fpu_get_fpr_type(m1.type));
504 ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e,
505 ppc_fpu_get_fpr_type(m2.type));*/
506 // create product with 106 significant bits
507 ppc_fpu_mul_quadro(p, m1, m2, 106);
508 /* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e,
509 ppc_fpu_get_fpr_type(p.type));*/
510 // convert s into ppc_quadro
511 /* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e,
512 ppc_fpu_get_fpr_type(s.type));*/
513 ppc_quadro q;
514 q.e = s.e;
515 q.s = s.s;
516 q.type = s.type;
517 q.m0 = 0;
518 q.m1 = s.m;
519 // .. with 106 significant bits
520 ppc_fpu_quadro_mshl(q, 106-56);
521 /* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e,
522 ppc_fpu_get_fpr_type(q.type));*/
523 // now we must add p, q.
524 ppc_quadro x;
525 ppc_fpu_add_quadro(x, p, q);
526 // x = [107]
527 /* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e,
528 ppc_fpu_get_fpr_type(x.type));*/
529 res.type = x.type;
530 res.s = x.s;
531 res.e = x.e;
532 if (x.type == ppc_fpr_norm) {
533 res.m = x.m0 << 13; // 43 bits from m0
534 res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1
535 res.m |= x.m1 & 1; // X' bit from m1
536 }
537 /* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e,
538 ppc_fpu_get_fpr_type(res.type));*/
539 }
540
541 inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b)
542 {
543 res.s = a.s ^ b.s;
544 switch (PPC_FPR_TYPE2(a.type, b.type)) {
545 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): {
546 res.type = ppc_fpr_norm;
547 res.e = a.e - b.e;
548 res.m = 0;
549 uint64 am = a.m, bm = b.m;
550 uint i = 0;
551 while (am && (i<56)) {
552 res.m <<= 1;
553 if (am >= bm) {
554 res.m |= 1;
555 am -= bm;
556 }
557 am <<= 1;
558 // printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
559 i++;
560 }
561 res.m <<= 57-i;
562 if (res.m & (1ULL << 56)) {
563 res.m >>= 1;
564 } else {
565 res.e--;
566 }
567 // printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m);
568 break;
569 }
570 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN):
571 res.e = a.e;
572 // fall-thru
573 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm):
574 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf):
575 case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero):
576 res.s = a.s;
577 // fall-thru
578 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm):
579 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm):
580 res.type = a.type;
581 break;
582 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN):
583 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN):
584 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN):
585 res.s = b.s;
586 res.type = b.type;
587 break;
588 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf):
589 res.type = ppc_fpr_zero;
590 break;
591 case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero):
592 res.type = ppc_fpr_Inf;
593 break;
594 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf):
595 case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero):
596 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf):
597 case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero):
598 res.type = ppc_fpr_NaN;
599 break;
600 }
601 }
602
603 inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B)
604 {
605 switch (B.type) {
606 case ppc_fpr_norm:
607 if (B.s) {
608 D.type = ppc_fpr_NaN;
609 gCPU.fpscr |= FPSCR_VXSQRT;
610 break;
611 }
612 // D := 1/2(D_old + B/D_old)
613 D = B;
614 D.e /= 2;
615 for (int i=0; i<6; i++) {
616 ppc_double D_old = D;
617 ppc_double B_div_D_old;
618 ppc_fpu_div(B_div_D_old, B, D_old);
619 ppc_fpu_add(D, D_old, B_div_D_old);
620 D.e--;
621
622 /* uint64 e;
623 ppc_double E = D;
624 ppc_fpu_pack_double(E, e);
625 printf("%.20f\n", *(double *)&e);*/
626 }
627 break;
628 case ppc_fpr_zero:
629 D.type = ppc_fpr_zero;
630 D.s = B.s;
631 break;
632 case ppc_fpr_Inf:
633 if (B.s) {
634 D.type = ppc_fpr_NaN;
635 gCPU.fpscr |= FPSCR_VXSQRT;
636 } else {
637 D.type = ppc_fpr_Inf;
638 D.s = 0;
639 }
640 break;
641 case ppc_fpr_NaN:
642 D.type = ppc_fpr_NaN;
643 break;
644 }
645 }
646
647 void ppc_fpu_test()
648 {
649 ppc_double A, B, C;
650 double a, b, c;
651 A.type = B.type = ppc_fpr_norm;
652 A.s = 1;
653 A.e = 0;
654 A.m = 0;
655 A.m = ((1ULL<<56)-1)-((1ULL<<10)-1);
656 ht_printf("%qb\n", A.m);
657 B.s = 1;
658 B.e = 0;
659 B.m = 0;
660 B.m = ((1ULL<<56)-1)-((1ULL<<50)-1);
661 a = ppc_fpu_get_double(A);
662 b = ppc_fpu_get_double(B);
663 printf("%f + %f = \n", a, b);
664 ppc_fpu_add(C, A, B);
665 uint64 d;
666 uint32 s;
667 ppc_fpu_pack_double_as_single(C, d);
668 ht_printf("%064qb\n", d);
669 ppc_fpu_unpack_double(C, d);
670 ppc_fpu_pack_single(C, s);
671 ht_printf("single: %032b\n", s);
672 ppc_single Cs;
673 ppc_fpu_unpack_single(Cs, s);
674 ppc_fpu_single_to_double(Cs, C);
675 // ht_printf("%d\n", ppc_fpu_double_to_int(C));
676 c = ppc_fpu_get_double(C);
677 printf("%f\n", c);
678 }
679
680 /*
681 * a and b must not be NaNs
682 */
683 inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b)
684 {
685 if (a.type == ppc_fpr_zero) {
686 if (b.type == ppc_fpr_zero) return 2;
687 return (b.s) ? 4: 8;
688 }
689 if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4;
690 if (a.s != b.s) return (a.s) ? 8: 4;
691 if (a.e > b.e) return (a.s) ? 8: 4;
692 if (a.e < b.e) return (a.s) ? 4: 8;
693 if (a.m > b.m) return (a.s) ? 8: 4;
694 if (a.m < b.m) return (a.s) ? 4: 8;
695 return 2;
696 }
697
698 double ppc_fpu_get_double(uint64 d)
699 {
700 ppc_double dd;
701 ppc_fpu_unpack_double(dd, d);
702 return ppc_fpu_get_double(dd);
703 }
704
705 double ppc_fpu_get_double(ppc_double &d)
706 {
707 if (d.type == ppc_fpr_norm) {
708 double r = d.m;
709 for (int i=0; i<55; i++) {
710 r = r / 2.0;
711 }
712 if (d.e < 0) {
713 for (int i=0; i>d.e; i--) {
714 r = r / 2.0;
715 }
716 } else if (d.e > 0) {
717 for (int i=0; i<d.e; i++) {
718 r = r * 2.0;
719 }
720 }
721 if (d.s) r = -r;
722 return r;
723 } else {
724 return 0.0;
725 }
726 }
727
728 /***********************************************************************************
729 *
730 */
731
732
733 /*
734 * fabsx Floating Absolute Value
735 * .484
736 */
737 void ppc_opc_fabsx()
738 {
739 int frD, frA, frB;
740 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
741 PPC_OPC_ASSERT(frA==0);
742 gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT;
743 if (gCPU.current_opc & PPC_OPC_Rc) {
744 // update cr1 flags
745 PPC_FPU_ERR("fabs.\n");
746 }
747 }
748 /*
749 * faddx Floating Add (Double-Precision)
750 * .485
751 */
752 void ppc_opc_faddx()
753 {
754 int frD, frA, frB, frC;
755 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
756 PPC_OPC_ASSERT(frC==0);
757 ppc_double A, B, D;
758 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
759 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
760 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
761 gCPU.fpscr |= FPSCR_VXISI;
762 }
763 ppc_fpu_add(D, A, B);
764 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
765 if (gCPU.current_opc & PPC_OPC_Rc) {
766 // update cr1 flags
767 PPC_FPU_ERR("fadd.\n");
768 }
769 }
770 /*
771 * faddx Floating Add Single
772 * .486
773 */
774 void ppc_opc_faddsx()
775 {
776 int frD, frA, frB, frC;
777 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
778 PPC_OPC_ASSERT(frC==0);
779 ppc_double A, B, D;
780 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
781 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
782 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
783 gCPU.fpscr |= FPSCR_VXISI;
784 }
785 ppc_fpu_add(D, A, B);
786 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
787 if (gCPU.current_opc & PPC_OPC_Rc) {
788 // update cr1 flags
789 PPC_FPU_ERR("fadds.\n");
790 }
791 }
792 /*
793 * fcmpo Floating Compare Ordered
794 * .488
795 */
796 static uint32 ppc_fpu_cmp_and_mask[8] = {
797 0xfffffff0,
798 0xffffff0f,
799 0xfffff0ff,
800 0xffff0fff,
801 0xfff0ffff,
802 0xff0fffff,
803 0xf0ffffff,
804 0x0fffffff,
805 };
806 void ppc_opc_fcmpo()
807 {
808 int crfD, frA, frB;
809 PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
810 crfD >>= 2;
811 ppc_double A, B;
812 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
813 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
814 uint32 cmp;
815 if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
816 gCPU.fpscr |= FPSCR_VXSNAN;
817 /*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC;
818 cmp = 1;
819 } else {
820 cmp = ppc_fpu_compare(A, B);
821 }
822 crfD = 7-crfD;
823 gCPU.fpscr &= ~0x1f000;
824 gCPU.fpscr |= (cmp << 12);
825 gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
826 gCPU.cr |= (cmp << (crfD * 4));
827 }
828 /*
829 * fcmpu Floating Compare Unordered
830 * .489
831 */
832 void ppc_opc_fcmpu()
833 {
834 int crfD, frA, frB;
835 PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB);
836 crfD >>= 2;
837 ppc_double A, B;
838 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
839 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
840 uint32 cmp;
841 if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) {
842 gCPU.fpscr |= FPSCR_VXSNAN;
843 cmp = 1;
844 } else {
845 cmp = ppc_fpu_compare(A, B);
846 }
847 crfD = 7-crfD;
848 gCPU.fpscr &= ~0x1f000;
849 gCPU.fpscr |= (cmp << 12);
850 gCPU.cr &= ppc_fpu_cmp_and_mask[crfD];
851 gCPU.cr |= (cmp << (crfD * 4));
852 }
853 /*
854 * fctiwx Floating Convert to Integer Word
855 * .492
856 */
857 void ppc_opc_fctiwx()
858 {
859 int frD, frA, frB;
860 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
861 PPC_OPC_ASSERT(frA==0);
862 ppc_double B;
863 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
864 gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
865 if (gCPU.current_opc & PPC_OPC_Rc) {
866 // update cr1 flags
867 PPC_FPU_ERR("fctiw.\n");
868 }
869 }
870 /*
871 * fctiwzx Floating Convert to Integer Word with Round toward Zero
872 * .493
873 */
874 void ppc_opc_fctiwzx()
875 {
876 int frD, frA, frB;
877 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
878 PPC_OPC_ASSERT(frA==0);
879 uint32 oldfpscr = gCPU.fpscr;
880 gCPU.fpscr &= ~3;
881 gCPU.fpscr |= 1;
882 ppc_double B;
883 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
884 gCPU.fpr[frD] = ppc_fpu_double_to_int(B);
885 gCPU.fpscr = oldfpscr;
886 if (gCPU.current_opc & PPC_OPC_Rc) {
887 // update cr1 flags
888 PPC_FPU_ERR("fctiwz.\n");
889 }
890 }
891 /*
892 * fdivx Floating Divide (Double-Precision)
893 * .494
894 */
895 void ppc_opc_fdivx()
896 {
897 int frD, frA, frB, frC;
898 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
899 PPC_OPC_ASSERT(frC==0);
900 ppc_double A, B, D;
901 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
902 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
903 if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
904 gCPU.fpscr |= FPSCR_VXZDZ;
905 }
906 if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
907 gCPU.fpscr |= FPSCR_VXIDI;
908 }
909 if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
910 // FIXME::
911 gCPU.fpscr |= FPSCR_VXIDI;
912 }
913 ppc_fpu_div(D, A, B);
914 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
915 if (gCPU.current_opc & PPC_OPC_Rc) {
916 // update cr1 flags
917 PPC_FPU_ERR("fdiv.\n");
918 }
919 }
920 /*
921 * fdivsx Floating Divide Single
922 * .495
923 */
924 void ppc_opc_fdivsx()
925 {
926 int frD, frA, frB, frC;
927 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
928 PPC_OPC_ASSERT(frC==0);
929 ppc_double A, B, D;
930 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
931 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
932 if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) {
933 gCPU.fpscr |= FPSCR_VXZDZ;
934 }
935 if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
936 gCPU.fpscr |= FPSCR_VXIDI;
937 }
938 if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) {
939 // FIXME::
940 gCPU.fpscr |= FPSCR_VXIDI;
941 }
942 ppc_fpu_div(D, A, B);
943 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
944 if (gCPU.current_opc & PPC_OPC_Rc) {
945 // update cr1 flags
946 PPC_FPU_ERR("fdivs.\n");
947 }
948 }
949 /*
950 * fmaddx Floating Multiply-Add (Double-Precision)
951 * .496
952 */
953 void ppc_opc_fmaddx()
954 {
955 int frD, frA, frB, frC;
956 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
957 ppc_double A, B, C, D;
958 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
959 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
960 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
961 ppc_fpu_mul_add(D, A, C, B);
962 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
963 if (gCPU.current_opc & PPC_OPC_Rc) {
964 // update cr1 flags
965 PPC_FPU_ERR("fmadd.\n");
966 }
967 }
968 /*
969 * fmaddx Floating Multiply-Add Single
970 * .497
971 */
972 void ppc_opc_fmaddsx()
973 {
974 int frD, frA, frB, frC;
975 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
976 ppc_double A, B, C, D;
977 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
978 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
979 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
980 ppc_fpu_mul_add(D, A, C, B);
981 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
982 if (gCPU.current_opc & PPC_OPC_Rc) {
983 // update cr1 flags
984 PPC_FPU_ERR("fmadds.\n");
985 }
986 }
987 /*
988 * fmrx Floating Move Register
989 * .498
990 */
991 void ppc_opc_fmrx()
992 {
993 int frD, rA, frB;
994 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB);
995 PPC_OPC_ASSERT(rA==0);
996 gCPU.fpr[frD] = gCPU.fpr[frB];
997 if (gCPU.current_opc & PPC_OPC_Rc) {
998 // update cr1 flags
999 PPC_FPU_ERR("fmr.\n");
1000 }
1001 }
1002 /*
1003 * fmsubx Floating Multiply-Subtract (Double-Precision)
1004 * .499
1005 */
1006 void ppc_opc_fmsubx()
1007 {
1008 int frD, frA, frB, frC;
1009 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1010 ppc_double A, B, C, D;
1011 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1012 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1013 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1014 B.s ^= 1;
1015 ppc_fpu_mul_add(D, A, C, B);
1016 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1017 if (gCPU.current_opc & PPC_OPC_Rc) {
1018 // update cr1 flags
1019 PPC_FPU_ERR("fmsub.\n");
1020 }
1021 }
1022 /*
1023 * fmsubsx Floating Multiply-Subtract Single
1024 * .500
1025 */
1026 void ppc_opc_fmsubsx()
1027 {
1028 int frD, frA, frB, frC;
1029 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1030 ppc_double A, B, C, D;
1031 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1032 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1033 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1034 ppc_fpu_mul_add(D, A, C, B);
1035 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1036 if (gCPU.current_opc & PPC_OPC_Rc) {
1037 // update cr1 flags
1038 PPC_FPU_ERR("fmsubs.\n");
1039 }
1040 }
1041 /*
1042 * fmulx Floating Multipy (Double-Precision)
1043 * .501
1044 */
1045 void ppc_opc_fmulx()
1046 {
1047 int frD, frA, frB, frC;
1048 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1049 PPC_OPC_ASSERT(frB==0);
1050 ppc_double A, C, D;
1051 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1052 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1053 if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1054 || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1055 gCPU.fpscr |= FPSCR_VXIMZ;
1056 }
1057 ppc_fpu_mul(D, A, C);
1058 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1059 // *((double*)&gCPU.fpr[frD]) = *((double*)(&gCPU.fpr[frA]))*(*((double*)(&gCPU.fpr[frC])));
1060 if (gCPU.current_opc & PPC_OPC_Rc) {
1061 // update cr1 flags
1062 PPC_FPU_ERR("fmul.\n");
1063 }
1064 }
1065 /*
1066 * fmulsx Floating Multipy Single
1067 * .502
1068 */
1069 void ppc_opc_fmulsx()
1070 {
1071 int frD, frA, frB, frC;
1072 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1073 PPC_OPC_ASSERT(frB==0);
1074 ppc_double A, C, D;
1075 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1076 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1077 if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero)
1078 || (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) {
1079 gCPU.fpscr |= FPSCR_VXIMZ;
1080 }
1081 ppc_fpu_mul(D, A, C);
1082 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1083 if (gCPU.current_opc & PPC_OPC_Rc) {
1084 // update cr1 flags
1085 PPC_FPU_ERR("fmuls.\n");
1086 }
1087 }
1088 /*
1089 * fnabsx Floating Negative Absolute Value
1090 * .503
1091 */
1092 void ppc_opc_fnabsx()
1093 {
1094 int frD, frA, frB;
1095 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1096 PPC_OPC_ASSERT(frA==0);
1097 gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT;
1098 if (gCPU.current_opc & PPC_OPC_Rc) {
1099 // update cr1 flags
1100 PPC_FPU_ERR("fnabs.\n");
1101 }
1102 }
1103 /*
1104 * fnegx Floating Negate
1105 * .504
1106 */
1107 void ppc_opc_fnegx()
1108 {
1109 int frD, frA, frB;
1110 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1111 PPC_OPC_ASSERT(frA==0);
1112 gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT;
1113 if (gCPU.current_opc & PPC_OPC_Rc) {
1114 // update cr1 flags
1115 PPC_FPU_ERR("fneg.\n");
1116 }
1117 }
1118 /*
1119 * fnmaddx Floating Negative Multiply-Add (Double-Precision)
1120 * .505
1121 */
1122 void ppc_opc_fnmaddx()
1123 {
1124 int frD, frA, frB, frC;
1125 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1126 ppc_double A, B, C, D/*, E*/;
1127 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1128 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1129 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1130 ppc_fpu_mul_add(D, A, C, B);
1131 D.s ^= 1;
1132 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1133 if (gCPU.current_opc & PPC_OPC_Rc) {
1134 // update cr1 flags
1135 PPC_FPU_ERR("fnmadd.\n");
1136 }
1137 }
1138 /*
1139 * fnmaddsx Floating Negative Multiply-Add Single
1140 * .506
1141 */
1142 void ppc_opc_fnmaddsx()
1143 {
1144 int frD, frA, frB, frC;
1145 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1146 ppc_double A, B, C, D;
1147 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1148 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1149 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1150 ppc_fpu_mul_add(D, A, C, B);
1151 D.s ^= 1;
1152 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1153 if (gCPU.current_opc & PPC_OPC_Rc) {
1154 // update cr1 flags
1155 PPC_FPU_ERR("fnmadds.\n");
1156 }
1157 }
1158 /*
1159 * fnmsubx Floating Negative Multiply-Subtract (Double-Precision)
1160 * .507
1161 */
1162 void ppc_opc_fnmsubx()
1163 {
1164 int frD, frA, frB, frC;
1165 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1166 ppc_double A, B, C, D;
1167 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1168 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1169 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1170 B.s ^= 1;
1171 ppc_fpu_mul_add(D, A, C, B);
1172 D.s ^= 1;
1173 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1174 if (gCPU.current_opc & PPC_OPC_Rc) {
1175 // update cr1 flags
1176 PPC_FPU_ERR("fnmsub.\n");
1177 }
1178 }
1179 /*
1180 * fnsubsx Floating Negative Multiply-Subtract Single
1181 * .508
1182 */
1183 void ppc_opc_fnmsubsx()
1184 {
1185 int frD, frA, frB, frC;
1186 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1187 ppc_double A, B, C, D;
1188 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1189 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1190 ppc_fpu_unpack_double(C, gCPU.fpr[frC]);
1191 B.s ^= 1;
1192 ppc_fpu_mul_add(D, A, C, B);
1193 D.s ^= 1;
1194 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1195 if (gCPU.current_opc & PPC_OPC_Rc) {
1196 // update cr1 flags
1197 PPC_FPU_ERR("fnmsubs.\n");
1198 }
1199 }
1200 /*
1201 * fresx Floating Reciprocal Estimate Single
1202 * .509
1203 */
1204 void ppc_opc_fresx()
1205 {
1206 int frD, frA, frB, frC;
1207 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1208 PPC_OPC_ASSERT(frA==0 && frC==0);
1209 if (gCPU.current_opc & PPC_OPC_Rc) {
1210 // update cr1 flags
1211 PPC_FPU_ERR("fres.\n");
1212 }
1213 PPC_FPU_ERR("fres\n");
1214 }
1215 /*
1216 * frspx Floating Round to Single
1217 * .511
1218 */
1219 void ppc_opc_frspx()
1220 {
1221 int frD, frA, frB;
1222 PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB);
1223 PPC_OPC_ASSERT(frA==0);
1224 ppc_double B;
1225 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1226 gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]);
1227 if (gCPU.current_opc & PPC_OPC_Rc) {
1228 // update cr1 flags
1229 PPC_FPU_ERR("frsp.\n");
1230 }
1231 }
1232 /*
1233 * frsqrtex Floating Reciprocal Square Root Estimate
1234 * .512
1235 */
1236 void ppc_opc_frsqrtex()
1237 {
1238 int frD, frA, frB, frC;
1239 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1240 PPC_OPC_ASSERT(frA==0 && frC==0);
1241 ppc_double B;
1242 ppc_double D;
1243 ppc_double E;
1244 ppc_double Q;
1245 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1246 ppc_fpu_sqrt(Q, B);
1247 E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL;
1248 ppc_fpu_div(D, E, Q);
1249 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1250 if (gCPU.current_opc & PPC_OPC_Rc) {
1251 // update cr1 flags
1252 PPC_FPU_ERR("frsqrte.\n");
1253 }
1254 }
1255 /*
1256 * fselx Floating Select
1257 * .514
1258 */
1259 void ppc_opc_fselx()
1260 {
1261 int frD, frA, frB, frC;
1262 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1263 ppc_double A;
1264 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1265 if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) {
1266 gCPU.fpr[frD] = gCPU.fpr[frB];
1267 } else {
1268 gCPU.fpr[frD] = gCPU.fpr[frC];
1269 }
1270 if (gCPU.current_opc & PPC_OPC_Rc) {
1271 // update cr1 flags
1272 PPC_FPU_ERR("fsel.\n");
1273 }
1274 }
1275 /*
1276 * fsqrtx Floating Square Root (Double-Precision)
1277 * .515
1278 */
1279 void ppc_opc_fsqrtx()
1280 {
1281 int frD, frA, frB, frC;
1282 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1283 PPC_OPC_ASSERT(frA==0 && frC==0);
1284 ppc_double B;
1285 ppc_double D;
1286 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1287 ppc_fpu_sqrt(D, B);
1288 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1289 if (gCPU.current_opc & PPC_OPC_Rc) {
1290 // update cr1 flags
1291 PPC_FPU_ERR("fsqrt.\n");
1292 }
1293 }
1294 /*
1295 * fsqrtsx Floating Square Root Single
1296 * .515
1297 */
1298 void ppc_opc_fsqrtsx()
1299 {
1300 int frD, frA, frB, frC;
1301 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1302 PPC_OPC_ASSERT(frA==0 && frC==0);
1303 if (gCPU.current_opc & PPC_OPC_Rc) {
1304 // update cr1 flags
1305 PPC_FPU_ERR("fsqrts.\n");
1306 }
1307 PPC_FPU_ERR("fsqrts\n");
1308 }
1309 /*
1310 * fsubx Floating Subtract (Double-Precision)
1311 * .517
1312 */
1313 void ppc_opc_fsubx()
1314 {
1315 int frD, frA, frB, frC;
1316 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1317 PPC_OPC_ASSERT(frC==0);
1318 ppc_double A, B, D;
1319 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1320 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1321 if (B.type != ppc_fpr_NaN) {
1322 B.s ^= 1;
1323 }
1324 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1325 gCPU.fpscr |= FPSCR_VXISI;
1326 }
1327 ppc_fpu_add(D, A, B);
1328 gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]);
1329 if (gCPU.current_opc & PPC_OPC_Rc) {
1330 // update cr1 flags
1331 PPC_FPU_ERR("fsub.\n");
1332 }
1333 }
1334 /*
1335 * fsubsx Floating Subtract Single
1336 * .518
1337 */
1338 void ppc_opc_fsubsx()
1339 {
1340 int frD, frA, frB, frC;
1341 PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC);
1342 PPC_OPC_ASSERT(frC==0);
1343 ppc_double A, B, D;
1344 ppc_fpu_unpack_double(A, gCPU.fpr[frA]);
1345 ppc_fpu_unpack_double(B, gCPU.fpr[frB]);
1346 if (B.type != ppc_fpr_NaN) {
1347 B.s ^= 1;
1348 }
1349 if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) {
1350 gCPU.fpscr |= FPSCR_VXISI;
1351 }
1352 ppc_fpu_add(D, A, B);
1353 gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]);
1354 if (gCPU.current_opc & PPC_OPC_Rc) {
1355 // update cr1 flags
1356 PPC_FPU_ERR("fsubs.\n");
1357 }
1358 }
1359

  ViewVC Help
Powered by ViewVC 1.1.26