1 |
/* |
2 |
* PearPC |
3 |
* ppc_fpu.cc |
4 |
* |
5 |
* Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net) |
6 |
* Copyright (C) 2003 Stefan Weyergraf |
7 |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License version 2 as |
10 |
* published by the Free Software Foundation. |
11 |
* |
12 |
* This program is distributed in the hope that it will be useful, |
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
* GNU General Public License for more details. |
16 |
* |
17 |
* You should have received a copy of the GNU General Public License |
18 |
* along with this program; if not, write to the Free Software |
19 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 |
*/ |
21 |
|
22 |
#include "debug/tracers.h" |
23 |
#include "ppc_cpu.h" |
24 |
#include "ppc_dec.h" |
25 |
#include "ppc_fpu.h" |
26 |
|
27 |
// .121 |
28 |
|
29 |
|
30 |
#define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b)) |
31 |
inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b) |
32 |
{ |
33 |
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
34 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
35 |
int diff = a.e - b.e; |
36 |
if (diff<0) { |
37 |
diff = -diff; |
38 |
if (diff <= 56) { |
39 |
a.m >>= diff; |
40 |
} else if (a.m != 0) { |
41 |
a.m = 1; |
42 |
} else { |
43 |
a.m = 0; |
44 |
} |
45 |
res.e = b.e; |
46 |
} else { |
47 |
if (diff <= 56) { |
48 |
b.m >>= diff; |
49 |
} else if (b.m != 0) { |
50 |
b.m = 1; |
51 |
} else { |
52 |
b.m = 0; |
53 |
} |
54 |
res.e = a.e; |
55 |
} |
56 |
res.type = ppc_fpr_norm; |
57 |
if (a.s == b.s) { |
58 |
res.s = a.s; |
59 |
res.m = a.m + b.m; |
60 |
if (res.m & (1ULL<<56)) { |
61 |
res.m >>= 1; |
62 |
res.e++; |
63 |
} |
64 |
} else { |
65 |
res.s = a.s; |
66 |
res.m = a.m - b.m; |
67 |
if (!res.m) { |
68 |
if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { |
69 |
res.s |= b.s; |
70 |
} else { |
71 |
res.s &= b.s; |
72 |
} |
73 |
res.type = ppc_fpr_zero; |
74 |
} else { |
75 |
if ((sint64)res.m < 0) { |
76 |
res.m = b.m - a.m; |
77 |
res.s = b.s; |
78 |
} |
79 |
diff = ppc_fpu_normalize(res) - 8; |
80 |
res.e -= diff; |
81 |
res.m <<= diff; |
82 |
} |
83 |
} |
84 |
break; |
85 |
} |
86 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
87 |
res.s = a.s; |
88 |
res.type = ppc_fpr_NaN; |
89 |
break; |
90 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
91 |
res.e = a.e; |
92 |
// fall-thru |
93 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
94 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
95 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
96 |
res.s = a.s; |
97 |
res.m = a.m; |
98 |
res.type = a.type; |
99 |
break; |
100 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
101 |
res.e = b.e; |
102 |
// fall-thru |
103 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
104 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
105 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
106 |
res.s = b.s; |
107 |
res.m = b.m; |
108 |
res.type = b.type; |
109 |
break; |
110 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
111 |
if (a.s != b.s) { |
112 |
// +oo + -oo == NaN |
113 |
res.s = a.s ^ b.s; |
114 |
res.type = ppc_fpr_NaN; |
115 |
break; |
116 |
} |
117 |
// fall-thru |
118 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
119 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
120 |
res.s = a.s; |
121 |
res.type = a.type; |
122 |
break; |
123 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
124 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
125 |
res.s = b.s; |
126 |
res.type = b.type; |
127 |
break; |
128 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
129 |
// round bla |
130 |
res.type = ppc_fpr_zero; |
131 |
res.s = a.s && b.s; |
132 |
break; |
133 |
} |
134 |
} |
135 |
|
136 |
inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp) |
137 |
{ |
138 |
if (exp >= 64) { |
139 |
q.m1 = q.m0; |
140 |
q.m0 = 0; |
141 |
exp -= 64; |
142 |
} |
143 |
uint64 t = q.m0 & ((1ULL<<exp)-1); |
144 |
q.m0 >>= exp; |
145 |
q.m1 >>= exp; |
146 |
q.m1 |= t<<(64-exp); |
147 |
} |
148 |
|
149 |
inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp) |
150 |
{ |
151 |
if (exp >= 64) { |
152 |
q.m0 = q.m1; |
153 |
q.m1 = 0; |
154 |
exp -= 64; |
155 |
} |
156 |
uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1); |
157 |
q.m0 <<= exp; |
158 |
q.m1 <<= exp; |
159 |
q.m0 |= t; |
160 |
} |
161 |
|
162 |
inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b) |
163 |
{ |
164 |
res.m1 = a.m1+b.m1; |
165 |
if (res.m1 < a.m1) { |
166 |
res.m0 = a.m0+b.m0+1; |
167 |
} else { |
168 |
res.m0 = a.m0+b.m0; |
169 |
} |
170 |
} |
171 |
|
172 |
inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b) |
173 |
{ |
174 |
res.m1 = a.m1-b.m1; |
175 |
if (a.m1 < b.m1) { |
176 |
res.m0 = a.m0-b.m0-1; |
177 |
} else { |
178 |
res.m0 = a.m0-b.m0; |
179 |
} |
180 |
} |
181 |
|
182 |
// res has 107 significant bits. a, b have 106 significant bits each. |
183 |
inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b) |
184 |
{ |
185 |
// treat as 107 bit mantissa |
186 |
if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1); |
187 |
if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1); |
188 |
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
189 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
190 |
int diff = a.e - b.e; |
191 |
if (diff < 0) { |
192 |
diff = -diff; |
193 |
if (diff <= 107) { |
194 |
// FIXME: may set x_prime |
195 |
ppc_fpu_quadro_mshr(a, diff); |
196 |
} else if (a.m0 || a.m1) { |
197 |
a.m0 = 0; |
198 |
a.m1 = 1; |
199 |
} else { |
200 |
a.m0 = 0; |
201 |
a.m1 = 0; |
202 |
} |
203 |
res.e = b.e; |
204 |
} else { |
205 |
if (diff <= 107) { |
206 |
// FIXME: may set x_prime |
207 |
ppc_fpu_quadro_mshr(b, diff); |
208 |
} else if (b.m0 || b.m1) { |
209 |
b.m0 = 0; |
210 |
b.m1 = 1; |
211 |
} else { |
212 |
b.m0 = 0; |
213 |
b.m1 = 0; |
214 |
} |
215 |
res.e = a.e; |
216 |
} |
217 |
res.type = ppc_fpr_norm; |
218 |
if (a.s == b.s) { |
219 |
res.s = a.s; |
220 |
ppc_fpu_add_quadro_m(res, a, b); |
221 |
int X_prime = res.m1 & 1; |
222 |
if (res.m0 & (1ULL<<(107-64))) { |
223 |
ppc_fpu_quadro_mshr(res, 1); |
224 |
res.e++; |
225 |
} |
226 |
// res = [107] |
227 |
res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime; |
228 |
} else { |
229 |
res.s = a.s; |
230 |
int cmp; |
231 |
if (a.m0 < b.m0) { |
232 |
cmp = -1; |
233 |
} else if (a.m0 > b.m0) { |
234 |
cmp = +1; |
235 |
} else { |
236 |
if (a.m1 < b.m1) { |
237 |
cmp = -1; |
238 |
} else if (a.m1 > b.m1) { |
239 |
cmp = +1; |
240 |
} else { |
241 |
cmp = 0; |
242 |
} |
243 |
} |
244 |
if (!cmp) { |
245 |
if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { |
246 |
res.s |= b.s; |
247 |
} else { |
248 |
res.s &= b.s; |
249 |
} |
250 |
res.type = ppc_fpr_zero; |
251 |
} else { |
252 |
if (cmp < 0) { |
253 |
ppc_fpu_sub_quadro_m(res, b, a); |
254 |
res.s = b.s; |
255 |
} else { |
256 |
ppc_fpu_sub_quadro_m(res, a, b); |
257 |
} |
258 |
diff = ppc_fpu_normalize_quadro(res) - (128-107); |
259 |
int X_prime = res.m1 & 1; |
260 |
res.m1 &= 0xfffffffffffffffeULL; |
261 |
ppc_fpu_quadro_mshl(res, diff); |
262 |
res.e -= diff; |
263 |
res.m1 |= X_prime; |
264 |
} |
265 |
// res = [107] |
266 |
} |
267 |
break; |
268 |
} |
269 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
270 |
res.s = a.s; |
271 |
res.type = ppc_fpr_NaN; |
272 |
break; |
273 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
274 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
275 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
276 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
277 |
res.e = a.e; |
278 |
res.s = a.s; |
279 |
res.m0 = a.m0; |
280 |
res.m1 = a.m1; |
281 |
res.type = a.type; |
282 |
break; |
283 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
284 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
285 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
286 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
287 |
res.e = b.e; |
288 |
res.s = b.s; |
289 |
res.m0 = b.m0; |
290 |
res.m1 = b.m1; |
291 |
res.type = b.type; |
292 |
break; |
293 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
294 |
if (a.s != b.s) { |
295 |
// +oo + -oo == NaN |
296 |
res.s = a.s ^ b.s; |
297 |
res.type = ppc_fpr_NaN; |
298 |
break; |
299 |
} |
300 |
// fall-thru |
301 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
302 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
303 |
res.s = a.s; |
304 |
res.type = a.type; |
305 |
break; |
306 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
307 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
308 |
res.s = b.s; |
309 |
res.type = b.type; |
310 |
break; |
311 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
312 |
// round bla |
313 |
res.type = ppc_fpr_zero; |
314 |
res.s = a.s && b.s; |
315 |
break; |
316 |
} |
317 |
} |
318 |
|
319 |
inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry) |
320 |
{ |
321 |
carry = (a+b < a) ? 1 : 0; |
322 |
a += b; |
323 |
} |
324 |
|
325 |
// 'res' has 56 significant bits on return, a + b have 56 significant bits each |
326 |
inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b) |
327 |
{ |
328 |
res.s = a.s ^ b.s; |
329 |
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
330 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
331 |
res.type = ppc_fpr_norm; |
332 |
res.e = a.e + b.e; |
333 |
// printf("new exp: %d\n", res.e); |
334 |
// ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m); |
335 |
uint64 fH, fM1, fM2, fL; |
336 |
fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64] |
337 |
fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56] |
338 |
fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56] |
339 |
fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48] |
340 |
// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); |
341 |
|
342 |
// calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL |
343 |
uint64 rL, rH; |
344 |
rL = fL; // rL = rH = [63,64] |
345 |
rH = fH; // rH = fH = [47,48] |
346 |
uint64 split; |
347 |
split = fM1 + fM2; |
348 |
uint64 carry; |
349 |
ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64] |
350 |
rH += carry; // rH = [0 .. 2^48] |
351 |
rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set |
352 |
|
353 |
// res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55] |
354 |
// [---------------------------------------------------------] |
355 |
// bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ] |
356 |
// [---------------------------------------------------------] |
357 |
// [15 bits zero | 49 bits rH | 8 most sign.bits rL ] |
358 |
res.m = rH << 9; |
359 |
res.m |= rL >> (64-9); |
360 |
// res.m = [58] |
361 |
|
362 |
// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); |
363 |
if (res.m & (1ULL << 57)) { |
364 |
res.m >>= 2; |
365 |
res.e += 2; |
366 |
} else if (res.m & (1ULL << 56)) { |
367 |
res.m >>= 1; |
368 |
res.e++; |
369 |
} |
370 |
// res.m = [56] |
371 |
break; |
372 |
} |
373 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
374 |
res.type = a.type; |
375 |
res.e = a.e; |
376 |
break; |
377 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
378 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
379 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
380 |
res.s = a.s; |
381 |
// fall-thru |
382 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
383 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
384 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
385 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
386 |
res.type = a.type; |
387 |
break; |
388 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
389 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
390 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
391 |
res.s = b.s; |
392 |
// fall-thru |
393 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
394 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
395 |
res.type = b.type; |
396 |
break; |
397 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
398 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
399 |
res.type = ppc_fpr_NaN; |
400 |
break; |
401 |
} |
402 |
} |
403 |
|
404 |
// 'res' has 'prec' significant bits on return, a + b have 56 significant bits each |
405 |
// for 111 >= prec >= 64 |
406 |
inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec) |
407 |
{ |
408 |
res.s = a.s ^ b.s; |
409 |
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
410 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
411 |
res.type = ppc_fpr_norm; |
412 |
res.e = a.e + b.e; |
413 |
// printf("new exp: %d\n", res.e); |
414 |
// ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m); |
415 |
uint64 fH, fM1, fM2, fL; |
416 |
fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64] |
417 |
fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56] |
418 |
fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56] |
419 |
fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48] |
420 |
// ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL); |
421 |
|
422 |
// calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL |
423 |
uint64 rL, rH; |
424 |
rL = fL; // rL = rH = [63,64] |
425 |
rH = fH; // rH = fH = [47,48] |
426 |
uint64 split; |
427 |
split = fM1 + fM2; |
428 |
uint64 carry; |
429 |
ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64] |
430 |
rH += carry; // rH = [0 .. 2^48] |
431 |
rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set |
432 |
|
433 |
// res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0] |
434 |
// [-----------------------------------------------------------] |
435 |
// log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ] |
436 |
// [-----------------------------------------------------------] |
437 |
// [ 15 bits zero | 49 bits rH | 64 bits rL ] |
438 |
res.m0 = rH; |
439 |
res.m1 = rL; |
440 |
// res.m0|res.m1 = [111,112,113] |
441 |
|
442 |
// ht_printf("res = %016qx%016qx\n", res.m0, res.m1); |
443 |
if (res.m0 & (1ULL << 48)) { |
444 |
ppc_fpu_quadro_mshr(res, 2+(111-prec)); |
445 |
res.e += 2; |
446 |
} else if (res.m0 & (1ULL << 47)) { |
447 |
ppc_fpu_quadro_mshr(res, 1+(111-prec)); |
448 |
res.e += 1; |
449 |
} else { |
450 |
ppc_fpu_quadro_mshr(res, 111-prec); |
451 |
} |
452 |
// res.m0|res.m1 = [prec] |
453 |
break; |
454 |
} |
455 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
456 |
res.type = a.type; |
457 |
res.e = a.e; |
458 |
break; |
459 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
460 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
461 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
462 |
res.s = a.s; |
463 |
// fall-thru |
464 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
465 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
466 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
467 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
468 |
res.type = a.type; |
469 |
break; |
470 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
471 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
472 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
473 |
res.s = b.s; |
474 |
// fall-thru |
475 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
476 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
477 |
res.type = b.type; |
478 |
break; |
479 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
480 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
481 |
res.type = ppc_fpr_NaN; |
482 |
break; |
483 |
} |
484 |
} |
485 |
|
486 |
// calculate one of these: |
487 |
// + m1 * m2 + s |
488 |
// + m1 * m2 - s |
489 |
// - m1 * m2 + s |
490 |
// - m1 * m2 - s |
491 |
// using a 106 bit accumulator |
492 |
// |
493 |
// .752 |
494 |
// |
495 |
// FIXME: There is a bug in this code that shows up in Mac OS X Finder fwd/bwd |
496 |
// button: the top line is not rendered correctly. This works with the jitc_x86 |
497 |
// FPU however... |
498 |
inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2, |
499 |
ppc_double &s) |
500 |
{ |
501 |
ppc_quadro p; |
502 |
/* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e, |
503 |
ppc_fpu_get_fpr_type(m1.type)); |
504 |
ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e, |
505 |
ppc_fpu_get_fpr_type(m2.type));*/ |
506 |
// create product with 106 significant bits |
507 |
ppc_fpu_mul_quadro(p, m1, m2, 106); |
508 |
/* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e, |
509 |
ppc_fpu_get_fpr_type(p.type));*/ |
510 |
// convert s into ppc_quadro |
511 |
/* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e, |
512 |
ppc_fpu_get_fpr_type(s.type));*/ |
513 |
ppc_quadro q; |
514 |
q.e = s.e; |
515 |
q.s = s.s; |
516 |
q.type = s.type; |
517 |
q.m0 = 0; |
518 |
q.m1 = s.m; |
519 |
// .. with 106 significant bits |
520 |
ppc_fpu_quadro_mshl(q, 106-56); |
521 |
/* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e, |
522 |
ppc_fpu_get_fpr_type(q.type));*/ |
523 |
// now we must add p, q. |
524 |
ppc_quadro x; |
525 |
ppc_fpu_add_quadro(x, p, q); |
526 |
// x = [107] |
527 |
/* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e, |
528 |
ppc_fpu_get_fpr_type(x.type));*/ |
529 |
res.type = x.type; |
530 |
res.s = x.s; |
531 |
res.e = x.e; |
532 |
if (x.type == ppc_fpr_norm) { |
533 |
res.m = x.m0 << 13; // 43 bits from m0 |
534 |
res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1 |
535 |
res.m |= x.m1 & 1; // X' bit from m1 |
536 |
} |
537 |
/* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e, |
538 |
ppc_fpu_get_fpr_type(res.type));*/ |
539 |
} |
540 |
|
541 |
inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b) |
542 |
{ |
543 |
res.s = a.s ^ b.s; |
544 |
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
545 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
546 |
res.type = ppc_fpr_norm; |
547 |
res.e = a.e - b.e; |
548 |
res.m = 0; |
549 |
uint64 am = a.m, bm = b.m; |
550 |
uint i = 0; |
551 |
while (am && (i<56)) { |
552 |
res.m <<= 1; |
553 |
if (am >= bm) { |
554 |
res.m |= 1; |
555 |
am -= bm; |
556 |
} |
557 |
am <<= 1; |
558 |
// printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); |
559 |
i++; |
560 |
} |
561 |
res.m <<= 57-i; |
562 |
if (res.m & (1ULL << 56)) { |
563 |
res.m >>= 1; |
564 |
} else { |
565 |
res.e--; |
566 |
} |
567 |
// printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); |
568 |
break; |
569 |
} |
570 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
571 |
res.e = a.e; |
572 |
// fall-thru |
573 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
574 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
575 |
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
576 |
res.s = a.s; |
577 |
// fall-thru |
578 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
579 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
580 |
res.type = a.type; |
581 |
break; |
582 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
583 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
584 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
585 |
res.s = b.s; |
586 |
res.type = b.type; |
587 |
break; |
588 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
589 |
res.type = ppc_fpr_zero; |
590 |
break; |
591 |
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
592 |
res.type = ppc_fpr_Inf; |
593 |
break; |
594 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
595 |
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
596 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
597 |
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
598 |
res.type = ppc_fpr_NaN; |
599 |
break; |
600 |
} |
601 |
} |
602 |
|
603 |
inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B) |
604 |
{ |
605 |
switch (B.type) { |
606 |
case ppc_fpr_norm: |
607 |
if (B.s) { |
608 |
D.type = ppc_fpr_NaN; |
609 |
gCPU.fpscr |= FPSCR_VXSQRT; |
610 |
break; |
611 |
} |
612 |
// D := 1/2(D_old + B/D_old) |
613 |
D = B; |
614 |
D.e /= 2; |
615 |
for (int i=0; i<6; i++) { |
616 |
ppc_double D_old = D; |
617 |
ppc_double B_div_D_old; |
618 |
ppc_fpu_div(B_div_D_old, B, D_old); |
619 |
ppc_fpu_add(D, D_old, B_div_D_old); |
620 |
D.e--; |
621 |
|
622 |
/* uint64 e; |
623 |
ppc_double E = D; |
624 |
ppc_fpu_pack_double(E, e); |
625 |
printf("%.20f\n", *(double *)&e);*/ |
626 |
} |
627 |
break; |
628 |
case ppc_fpr_zero: |
629 |
D.type = ppc_fpr_zero; |
630 |
D.s = B.s; |
631 |
break; |
632 |
case ppc_fpr_Inf: |
633 |
if (B.s) { |
634 |
D.type = ppc_fpr_NaN; |
635 |
gCPU.fpscr |= FPSCR_VXSQRT; |
636 |
} else { |
637 |
D.type = ppc_fpr_Inf; |
638 |
D.s = 0; |
639 |
} |
640 |
break; |
641 |
case ppc_fpr_NaN: |
642 |
D.type = ppc_fpr_NaN; |
643 |
break; |
644 |
} |
645 |
} |
646 |
|
647 |
void ppc_fpu_test() |
648 |
{ |
649 |
ppc_double A, B, C; |
650 |
double a, b, c; |
651 |
A.type = B.type = ppc_fpr_norm; |
652 |
A.s = 1; |
653 |
A.e = 0; |
654 |
A.m = 0; |
655 |
A.m = ((1ULL<<56)-1)-((1ULL<<10)-1); |
656 |
ht_printf("%qb\n", A.m); |
657 |
B.s = 1; |
658 |
B.e = 0; |
659 |
B.m = 0; |
660 |
B.m = ((1ULL<<56)-1)-((1ULL<<50)-1); |
661 |
a = ppc_fpu_get_double(A); |
662 |
b = ppc_fpu_get_double(B); |
663 |
printf("%f + %f = \n", a, b); |
664 |
ppc_fpu_add(C, A, B); |
665 |
uint64 d; |
666 |
uint32 s; |
667 |
ppc_fpu_pack_double_as_single(C, d); |
668 |
ht_printf("%064qb\n", d); |
669 |
ppc_fpu_unpack_double(C, d); |
670 |
ppc_fpu_pack_single(C, s); |
671 |
ht_printf("single: %032b\n", s); |
672 |
ppc_single Cs; |
673 |
ppc_fpu_unpack_single(Cs, s); |
674 |
ppc_fpu_single_to_double(Cs, C); |
675 |
// ht_printf("%d\n", ppc_fpu_double_to_int(C)); |
676 |
c = ppc_fpu_get_double(C); |
677 |
printf("%f\n", c); |
678 |
} |
679 |
|
680 |
/* |
681 |
* a and b must not be NaNs |
682 |
*/ |
683 |
inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b) |
684 |
{ |
685 |
if (a.type == ppc_fpr_zero) { |
686 |
if (b.type == ppc_fpr_zero) return 2; |
687 |
return (b.s) ? 4: 8; |
688 |
} |
689 |
if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4; |
690 |
if (a.s != b.s) return (a.s) ? 8: 4; |
691 |
if (a.e > b.e) return (a.s) ? 8: 4; |
692 |
if (a.e < b.e) return (a.s) ? 4: 8; |
693 |
if (a.m > b.m) return (a.s) ? 8: 4; |
694 |
if (a.m < b.m) return (a.s) ? 4: 8; |
695 |
return 2; |
696 |
} |
697 |
|
698 |
double ppc_fpu_get_double(uint64 d) |
699 |
{ |
700 |
ppc_double dd; |
701 |
ppc_fpu_unpack_double(dd, d); |
702 |
return ppc_fpu_get_double(dd); |
703 |
} |
704 |
|
705 |
double ppc_fpu_get_double(ppc_double &d) |
706 |
{ |
707 |
if (d.type == ppc_fpr_norm) { |
708 |
double r = d.m; |
709 |
for (int i=0; i<55; i++) { |
710 |
r = r / 2.0; |
711 |
} |
712 |
if (d.e < 0) { |
713 |
for (int i=0; i>d.e; i--) { |
714 |
r = r / 2.0; |
715 |
} |
716 |
} else if (d.e > 0) { |
717 |
for (int i=0; i<d.e; i++) { |
718 |
r = r * 2.0; |
719 |
} |
720 |
} |
721 |
if (d.s) r = -r; |
722 |
return r; |
723 |
} else { |
724 |
return 0.0; |
725 |
} |
726 |
} |
727 |
|
728 |
/*********************************************************************************** |
729 |
* |
730 |
*/ |
731 |
|
732 |
|
733 |
/* |
734 |
* fabsx Floating Absolute Value |
735 |
* .484 |
736 |
*/ |
737 |
void ppc_opc_fabsx() |
738 |
{ |
739 |
int frD, frA, frB; |
740 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
741 |
PPC_OPC_ASSERT(frA==0); |
742 |
gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT; |
743 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
744 |
// update cr1 flags |
745 |
PPC_FPU_ERR("fabs.\n"); |
746 |
} |
747 |
} |
748 |
/* |
749 |
* faddx Floating Add (Double-Precision) |
750 |
* .485 |
751 |
*/ |
752 |
void ppc_opc_faddx() |
753 |
{ |
754 |
int frD, frA, frB, frC; |
755 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
756 |
PPC_OPC_ASSERT(frC==0); |
757 |
ppc_double A, B, D; |
758 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
759 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
760 |
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
761 |
gCPU.fpscr |= FPSCR_VXISI; |
762 |
} |
763 |
ppc_fpu_add(D, A, B); |
764 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
765 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
766 |
// update cr1 flags |
767 |
PPC_FPU_ERR("fadd.\n"); |
768 |
} |
769 |
} |
770 |
/* |
771 |
* faddx Floating Add Single |
772 |
* .486 |
773 |
*/ |
774 |
void ppc_opc_faddsx() |
775 |
{ |
776 |
int frD, frA, frB, frC; |
777 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
778 |
PPC_OPC_ASSERT(frC==0); |
779 |
ppc_double A, B, D; |
780 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
781 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
782 |
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
783 |
gCPU.fpscr |= FPSCR_VXISI; |
784 |
} |
785 |
ppc_fpu_add(D, A, B); |
786 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
787 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
788 |
// update cr1 flags |
789 |
PPC_FPU_ERR("fadds.\n"); |
790 |
} |
791 |
} |
792 |
/* |
793 |
* fcmpo Floating Compare Ordered |
794 |
* .488 |
795 |
*/ |
796 |
static uint32 ppc_fpu_cmp_and_mask[8] = { |
797 |
0xfffffff0, |
798 |
0xffffff0f, |
799 |
0xfffff0ff, |
800 |
0xffff0fff, |
801 |
0xfff0ffff, |
802 |
0xff0fffff, |
803 |
0xf0ffffff, |
804 |
0x0fffffff, |
805 |
}; |
806 |
void ppc_opc_fcmpo() |
807 |
{ |
808 |
int crfD, frA, frB; |
809 |
PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); |
810 |
crfD >>= 2; |
811 |
ppc_double A, B; |
812 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
813 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
814 |
uint32 cmp; |
815 |
if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { |
816 |
gCPU.fpscr |= FPSCR_VXSNAN; |
817 |
/*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC; |
818 |
cmp = 1; |
819 |
} else { |
820 |
cmp = ppc_fpu_compare(A, B); |
821 |
} |
822 |
crfD = 7-crfD; |
823 |
gCPU.fpscr &= ~0x1f000; |
824 |
gCPU.fpscr |= (cmp << 12); |
825 |
gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; |
826 |
gCPU.cr |= (cmp << (crfD * 4)); |
827 |
} |
828 |
/* |
829 |
* fcmpu Floating Compare Unordered |
830 |
* .489 |
831 |
*/ |
832 |
void ppc_opc_fcmpu() |
833 |
{ |
834 |
int crfD, frA, frB; |
835 |
PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); |
836 |
crfD >>= 2; |
837 |
ppc_double A, B; |
838 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
839 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
840 |
uint32 cmp; |
841 |
if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { |
842 |
gCPU.fpscr |= FPSCR_VXSNAN; |
843 |
cmp = 1; |
844 |
} else { |
845 |
cmp = ppc_fpu_compare(A, B); |
846 |
} |
847 |
crfD = 7-crfD; |
848 |
gCPU.fpscr &= ~0x1f000; |
849 |
gCPU.fpscr |= (cmp << 12); |
850 |
gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; |
851 |
gCPU.cr |= (cmp << (crfD * 4)); |
852 |
} |
853 |
/* |
854 |
* fctiwx Floating Convert to Integer Word |
855 |
* .492 |
856 |
*/ |
857 |
void ppc_opc_fctiwx() |
858 |
{ |
859 |
int frD, frA, frB; |
860 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
861 |
PPC_OPC_ASSERT(frA==0); |
862 |
ppc_double B; |
863 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
864 |
gCPU.fpr[frD] = ppc_fpu_double_to_int(B); |
865 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
866 |
// update cr1 flags |
867 |
PPC_FPU_ERR("fctiw.\n"); |
868 |
} |
869 |
} |
870 |
/* |
871 |
* fctiwzx Floating Convert to Integer Word with Round toward Zero |
872 |
* .493 |
873 |
*/ |
874 |
void ppc_opc_fctiwzx() |
875 |
{ |
876 |
int frD, frA, frB; |
877 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
878 |
PPC_OPC_ASSERT(frA==0); |
879 |
uint32 oldfpscr = gCPU.fpscr; |
880 |
gCPU.fpscr &= ~3; |
881 |
gCPU.fpscr |= 1; |
882 |
ppc_double B; |
883 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
884 |
gCPU.fpr[frD] = ppc_fpu_double_to_int(B); |
885 |
gCPU.fpscr = oldfpscr; |
886 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
887 |
// update cr1 flags |
888 |
PPC_FPU_ERR("fctiwz.\n"); |
889 |
} |
890 |
} |
891 |
/* |
892 |
* fdivx Floating Divide (Double-Precision) |
893 |
* .494 |
894 |
*/ |
895 |
void ppc_opc_fdivx() |
896 |
{ |
897 |
int frD, frA, frB, frC; |
898 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
899 |
PPC_OPC_ASSERT(frC==0); |
900 |
ppc_double A, B, D; |
901 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
902 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
903 |
if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) { |
904 |
gCPU.fpscr |= FPSCR_VXZDZ; |
905 |
} |
906 |
if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
907 |
gCPU.fpscr |= FPSCR_VXIDI; |
908 |
} |
909 |
if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) { |
910 |
// FIXME:: |
911 |
gCPU.fpscr |= FPSCR_VXIDI; |
912 |
} |
913 |
ppc_fpu_div(D, A, B); |
914 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
915 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
916 |
// update cr1 flags |
917 |
PPC_FPU_ERR("fdiv.\n"); |
918 |
} |
919 |
} |
920 |
/* |
921 |
* fdivsx Floating Divide Single |
922 |
* .495 |
923 |
*/ |
924 |
void ppc_opc_fdivsx() |
925 |
{ |
926 |
int frD, frA, frB, frC; |
927 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
928 |
PPC_OPC_ASSERT(frC==0); |
929 |
ppc_double A, B, D; |
930 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
931 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
932 |
if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) { |
933 |
gCPU.fpscr |= FPSCR_VXZDZ; |
934 |
} |
935 |
if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
936 |
gCPU.fpscr |= FPSCR_VXIDI; |
937 |
} |
938 |
if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) { |
939 |
// FIXME:: |
940 |
gCPU.fpscr |= FPSCR_VXIDI; |
941 |
} |
942 |
ppc_fpu_div(D, A, B); |
943 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
944 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
945 |
// update cr1 flags |
946 |
PPC_FPU_ERR("fdivs.\n"); |
947 |
} |
948 |
} |
949 |
/* |
950 |
* fmaddx Floating Multiply-Add (Double-Precision) |
951 |
* .496 |
952 |
*/ |
953 |
void ppc_opc_fmaddx() |
954 |
{ |
955 |
int frD, frA, frB, frC; |
956 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
957 |
ppc_double A, B, C, D; |
958 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
959 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
960 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
961 |
ppc_fpu_mul_add(D, A, C, B); |
962 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
963 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
964 |
// update cr1 flags |
965 |
PPC_FPU_ERR("fmadd.\n"); |
966 |
} |
967 |
} |
968 |
/* |
969 |
* fmaddx Floating Multiply-Add Single |
970 |
* .497 |
971 |
*/ |
972 |
void ppc_opc_fmaddsx() |
973 |
{ |
974 |
int frD, frA, frB, frC; |
975 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
976 |
ppc_double A, B, C, D; |
977 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
978 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
979 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
980 |
ppc_fpu_mul_add(D, A, C, B); |
981 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
982 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
983 |
// update cr1 flags |
984 |
PPC_FPU_ERR("fmadds.\n"); |
985 |
} |
986 |
} |
987 |
/* |
988 |
* fmrx Floating Move Register |
989 |
* .498 |
990 |
*/ |
991 |
void ppc_opc_fmrx() |
992 |
{ |
993 |
int frD, rA, frB; |
994 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB); |
995 |
PPC_OPC_ASSERT(rA==0); |
996 |
gCPU.fpr[frD] = gCPU.fpr[frB]; |
997 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
998 |
// update cr1 flags |
999 |
PPC_FPU_ERR("fmr.\n"); |
1000 |
} |
1001 |
} |
1002 |
/* |
1003 |
* fmsubx Floating Multiply-Subtract (Double-Precision) |
1004 |
* .499 |
1005 |
*/ |
1006 |
void ppc_opc_fmsubx() |
1007 |
{ |
1008 |
int frD, frA, frB, frC; |
1009 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1010 |
ppc_double A, B, C, D; |
1011 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1012 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1013 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1014 |
B.s ^= 1; |
1015 |
ppc_fpu_mul_add(D, A, C, B); |
1016 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1017 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1018 |
// update cr1 flags |
1019 |
PPC_FPU_ERR("fmsub.\n"); |
1020 |
} |
1021 |
} |
1022 |
/* |
1023 |
* fmsubsx Floating Multiply-Subtract Single |
1024 |
* .500 |
1025 |
*/ |
1026 |
void ppc_opc_fmsubsx() |
1027 |
{ |
1028 |
int frD, frA, frB, frC; |
1029 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1030 |
ppc_double A, B, C, D; |
1031 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1032 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1033 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1034 |
ppc_fpu_mul_add(D, A, C, B); |
1035 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1036 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1037 |
// update cr1 flags |
1038 |
PPC_FPU_ERR("fmsubs.\n"); |
1039 |
} |
1040 |
} |
1041 |
/* |
1042 |
* fmulx Floating Multipy (Double-Precision) |
1043 |
* .501 |
1044 |
*/ |
1045 |
void ppc_opc_fmulx() |
1046 |
{ |
1047 |
int frD, frA, frB, frC; |
1048 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1049 |
PPC_OPC_ASSERT(frB==0); |
1050 |
ppc_double A, C, D; |
1051 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1052 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1053 |
if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero) |
1054 |
|| (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) { |
1055 |
gCPU.fpscr |= FPSCR_VXIMZ; |
1056 |
} |
1057 |
ppc_fpu_mul(D, A, C); |
1058 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1059 |
// *((double*)&gCPU.fpr[frD]) = *((double*)(&gCPU.fpr[frA]))*(*((double*)(&gCPU.fpr[frC]))); |
1060 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1061 |
// update cr1 flags |
1062 |
PPC_FPU_ERR("fmul.\n"); |
1063 |
} |
1064 |
} |
1065 |
/* |
1066 |
* fmulsx Floating Multipy Single |
1067 |
* .502 |
1068 |
*/ |
1069 |
void ppc_opc_fmulsx() |
1070 |
{ |
1071 |
int frD, frA, frB, frC; |
1072 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1073 |
PPC_OPC_ASSERT(frB==0); |
1074 |
ppc_double A, C, D; |
1075 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1076 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1077 |
if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero) |
1078 |
|| (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) { |
1079 |
gCPU.fpscr |= FPSCR_VXIMZ; |
1080 |
} |
1081 |
ppc_fpu_mul(D, A, C); |
1082 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1083 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1084 |
// update cr1 flags |
1085 |
PPC_FPU_ERR("fmuls.\n"); |
1086 |
} |
1087 |
} |
1088 |
/* |
1089 |
* fnabsx Floating Negative Absolute Value |
1090 |
* .503 |
1091 |
*/ |
1092 |
void ppc_opc_fnabsx() |
1093 |
{ |
1094 |
int frD, frA, frB; |
1095 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1096 |
PPC_OPC_ASSERT(frA==0); |
1097 |
gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT; |
1098 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1099 |
// update cr1 flags |
1100 |
PPC_FPU_ERR("fnabs.\n"); |
1101 |
} |
1102 |
} |
1103 |
/* |
1104 |
* fnegx Floating Negate |
1105 |
* .504 |
1106 |
*/ |
1107 |
void ppc_opc_fnegx() |
1108 |
{ |
1109 |
int frD, frA, frB; |
1110 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1111 |
PPC_OPC_ASSERT(frA==0); |
1112 |
gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT; |
1113 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1114 |
// update cr1 flags |
1115 |
PPC_FPU_ERR("fneg.\n"); |
1116 |
} |
1117 |
} |
1118 |
/* |
1119 |
* fnmaddx Floating Negative Multiply-Add (Double-Precision) |
1120 |
* .505 |
1121 |
*/ |
1122 |
void ppc_opc_fnmaddx() |
1123 |
{ |
1124 |
int frD, frA, frB, frC; |
1125 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1126 |
ppc_double A, B, C, D/*, E*/; |
1127 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1128 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1129 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1130 |
ppc_fpu_mul_add(D, A, C, B); |
1131 |
D.s ^= 1; |
1132 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1133 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1134 |
// update cr1 flags |
1135 |
PPC_FPU_ERR("fnmadd.\n"); |
1136 |
} |
1137 |
} |
1138 |
/* |
1139 |
* fnmaddsx Floating Negative Multiply-Add Single |
1140 |
* .506 |
1141 |
*/ |
1142 |
void ppc_opc_fnmaddsx() |
1143 |
{ |
1144 |
int frD, frA, frB, frC; |
1145 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1146 |
ppc_double A, B, C, D; |
1147 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1148 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1149 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1150 |
ppc_fpu_mul_add(D, A, C, B); |
1151 |
D.s ^= 1; |
1152 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1153 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1154 |
// update cr1 flags |
1155 |
PPC_FPU_ERR("fnmadds.\n"); |
1156 |
} |
1157 |
} |
1158 |
/* |
1159 |
* fnmsubx Floating Negative Multiply-Subtract (Double-Precision) |
1160 |
* .507 |
1161 |
*/ |
1162 |
void ppc_opc_fnmsubx() |
1163 |
{ |
1164 |
int frD, frA, frB, frC; |
1165 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1166 |
ppc_double A, B, C, D; |
1167 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1168 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1169 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1170 |
B.s ^= 1; |
1171 |
ppc_fpu_mul_add(D, A, C, B); |
1172 |
D.s ^= 1; |
1173 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1174 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1175 |
// update cr1 flags |
1176 |
PPC_FPU_ERR("fnmsub.\n"); |
1177 |
} |
1178 |
} |
1179 |
/* |
1180 |
* fnsubsx Floating Negative Multiply-Subtract Single |
1181 |
* .508 |
1182 |
*/ |
1183 |
void ppc_opc_fnmsubsx() |
1184 |
{ |
1185 |
int frD, frA, frB, frC; |
1186 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1187 |
ppc_double A, B, C, D; |
1188 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1189 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1190 |
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1191 |
B.s ^= 1; |
1192 |
ppc_fpu_mul_add(D, A, C, B); |
1193 |
D.s ^= 1; |
1194 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1195 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1196 |
// update cr1 flags |
1197 |
PPC_FPU_ERR("fnmsubs.\n"); |
1198 |
} |
1199 |
} |
1200 |
/* |
1201 |
* fresx Floating Reciprocal Estimate Single |
1202 |
* .509 |
1203 |
*/ |
1204 |
void ppc_opc_fresx() |
1205 |
{ |
1206 |
int frD, frA, frB, frC; |
1207 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1208 |
PPC_OPC_ASSERT(frA==0 && frC==0); |
1209 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1210 |
// update cr1 flags |
1211 |
PPC_FPU_ERR("fres.\n"); |
1212 |
} |
1213 |
PPC_FPU_ERR("fres\n"); |
1214 |
} |
1215 |
/* |
1216 |
* frspx Floating Round to Single |
1217 |
* .511 |
1218 |
*/ |
1219 |
void ppc_opc_frspx() |
1220 |
{ |
1221 |
int frD, frA, frB; |
1222 |
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1223 |
PPC_OPC_ASSERT(frA==0); |
1224 |
ppc_double B; |
1225 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1226 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]); |
1227 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1228 |
// update cr1 flags |
1229 |
PPC_FPU_ERR("frsp.\n"); |
1230 |
} |
1231 |
} |
1232 |
/* |
1233 |
* frsqrtex Floating Reciprocal Square Root Estimate |
1234 |
* .512 |
1235 |
*/ |
1236 |
void ppc_opc_frsqrtex() |
1237 |
{ |
1238 |
int frD, frA, frB, frC; |
1239 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1240 |
PPC_OPC_ASSERT(frA==0 && frC==0); |
1241 |
ppc_double B; |
1242 |
ppc_double D; |
1243 |
ppc_double E; |
1244 |
ppc_double Q; |
1245 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1246 |
ppc_fpu_sqrt(Q, B); |
1247 |
E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL; |
1248 |
ppc_fpu_div(D, E, Q); |
1249 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1250 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1251 |
// update cr1 flags |
1252 |
PPC_FPU_ERR("frsqrte.\n"); |
1253 |
} |
1254 |
} |
1255 |
/* |
1256 |
* fselx Floating Select |
1257 |
* .514 |
1258 |
*/ |
1259 |
void ppc_opc_fselx() |
1260 |
{ |
1261 |
int frD, frA, frB, frC; |
1262 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1263 |
ppc_double A; |
1264 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1265 |
if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) { |
1266 |
gCPU.fpr[frD] = gCPU.fpr[frB]; |
1267 |
} else { |
1268 |
gCPU.fpr[frD] = gCPU.fpr[frC]; |
1269 |
} |
1270 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1271 |
// update cr1 flags |
1272 |
PPC_FPU_ERR("fsel.\n"); |
1273 |
} |
1274 |
} |
1275 |
/* |
1276 |
* fsqrtx Floating Square Root (Double-Precision) |
1277 |
* .515 |
1278 |
*/ |
1279 |
void ppc_opc_fsqrtx() |
1280 |
{ |
1281 |
int frD, frA, frB, frC; |
1282 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1283 |
PPC_OPC_ASSERT(frA==0 && frC==0); |
1284 |
ppc_double B; |
1285 |
ppc_double D; |
1286 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1287 |
ppc_fpu_sqrt(D, B); |
1288 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1289 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1290 |
// update cr1 flags |
1291 |
PPC_FPU_ERR("fsqrt.\n"); |
1292 |
} |
1293 |
} |
1294 |
/* |
1295 |
* fsqrtsx Floating Square Root Single |
1296 |
* .515 |
1297 |
*/ |
1298 |
void ppc_opc_fsqrtsx() |
1299 |
{ |
1300 |
int frD, frA, frB, frC; |
1301 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1302 |
PPC_OPC_ASSERT(frA==0 && frC==0); |
1303 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1304 |
// update cr1 flags |
1305 |
PPC_FPU_ERR("fsqrts.\n"); |
1306 |
} |
1307 |
PPC_FPU_ERR("fsqrts\n"); |
1308 |
} |
1309 |
/* |
1310 |
* fsubx Floating Subtract (Double-Precision) |
1311 |
* .517 |
1312 |
*/ |
1313 |
void ppc_opc_fsubx() |
1314 |
{ |
1315 |
int frD, frA, frB, frC; |
1316 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1317 |
PPC_OPC_ASSERT(frC==0); |
1318 |
ppc_double A, B, D; |
1319 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1320 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1321 |
if (B.type != ppc_fpr_NaN) { |
1322 |
B.s ^= 1; |
1323 |
} |
1324 |
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1325 |
gCPU.fpscr |= FPSCR_VXISI; |
1326 |
} |
1327 |
ppc_fpu_add(D, A, B); |
1328 |
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1329 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1330 |
// update cr1 flags |
1331 |
PPC_FPU_ERR("fsub.\n"); |
1332 |
} |
1333 |
} |
1334 |
/* |
1335 |
* fsubsx Floating Subtract Single |
1336 |
* .518 |
1337 |
*/ |
1338 |
void ppc_opc_fsubsx() |
1339 |
{ |
1340 |
int frD, frA, frB, frC; |
1341 |
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1342 |
PPC_OPC_ASSERT(frC==0); |
1343 |
ppc_double A, B, D; |
1344 |
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1345 |
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1346 |
if (B.type != ppc_fpr_NaN) { |
1347 |
B.s ^= 1; |
1348 |
} |
1349 |
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1350 |
gCPU.fpscr |= FPSCR_VXISI; |
1351 |
} |
1352 |
ppc_fpu_add(D, A, B); |
1353 |
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1354 |
if (gCPU.current_opc & PPC_OPC_Rc) { |
1355 |
// update cr1 flags |
1356 |
PPC_FPU_ERR("fsubs.\n"); |
1357 |
} |
1358 |
} |
1359 |
|